* add tests that cover same behaviors as Convert and ConvertString * align read and write behaviors with bufio to play nice * add methods that allow to customize buffer size * add methods to reset, allowing reusereader-writer-rework
@@ -6,6 +6,7 @@ package iconv | |||||
#cgo windows LDFLAGS: -liconv | #cgo windows LDFLAGS: -liconv | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <iconv.h> | #include <iconv.h> | ||||
#include <locale.h> | |||||
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic | // As of GO 1.6 passing a pointer to Go pointer, will lead to panic | ||||
// Therofore we use this wrapper function, to avoid passing **char directly from go | // Therofore we use this wrapper function, to avoid passing **char directly from go | ||||
@@ -20,47 +21,40 @@ import "unsafe" | |||||
type Converter struct { | type Converter struct { | ||||
context C.iconv_t | context C.iconv_t | ||||
open bool | |||||
} | } | ||||
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by | // Initialize a new Converter. If fromEncoding or toEncoding are not supported by | ||||
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if | // iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if | ||||
// there is not enough memory to initialize an iconv descriptor | // there is not enough memory to initialize an iconv descriptor | ||||
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) { | |||||
converter = new(Converter) | |||||
func NewConverter(fromEncoding string, toEncoding string) (*Converter, error) { | |||||
// convert to C strings | // convert to C strings | ||||
toEncodingC := C.CString(toEncoding) | toEncodingC := C.CString(toEncoding) | ||||
fromEncodingC := C.CString(fromEncoding) | fromEncodingC := C.CString(fromEncoding) | ||||
// open an iconv descriptor | // open an iconv descriptor | ||||
converter.context, err = C.iconv_open(toEncodingC, fromEncodingC) | |||||
context, err := C.iconv_open(toEncodingC, fromEncodingC) | |||||
// free the C Strings | // free the C Strings | ||||
C.free(unsafe.Pointer(toEncodingC)) | C.free(unsafe.Pointer(toEncodingC)) | ||||
C.free(unsafe.Pointer(fromEncodingC)) | C.free(unsafe.Pointer(fromEncodingC)) | ||||
// check err | |||||
if err == nil { | |||||
// no error, mark the context as open | |||||
converter.open = true | |||||
if err != nil { | |||||
return nil, err | |||||
} | } | ||||
return | |||||
return &Converter{context}, nil | |||||
} | } | ||||
// destroy is called during garbage collection | |||||
func (this *Converter) destroy() { | |||||
this.Close() | |||||
// Close a Converter's iconv descriptor explicitly | |||||
func (converter *Converter) Close() error { | |||||
_, err := C.iconv_close(converter.context) | |||||
return err | |||||
} | } | ||||
// Close a Converter's iconv description explicitly | |||||
func (this *Converter) Close() (err error) { | |||||
if this.open { | |||||
_, err = C.iconv_close(this.context) | |||||
} | |||||
return | |||||
// Reset state of iconv context | |||||
func (converter *Converter) Reset() error { | |||||
_, _, err := converter.Convert(nil, nil) | |||||
return err | |||||
} | } | ||||
// Convert bytes from an input byte slice into a give output byte slice | // Convert bytes from an input byte slice into a give output byte slice | ||||
@@ -74,95 +68,82 @@ func (this *Converter) Close() (err error) { | |||||
// For shift based output encodings, any end shift byte sequences can be generated by | // For shift based output encodings, any end shift byte sequences can be generated by | ||||
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output | // passing a 0 length byte slice as input. Also passing a 0 length byte slice for output | ||||
// will simply reset the iconv descriptor shift state without writing any bytes. | // will simply reset the iconv descriptor shift state without writing any bytes. | ||||
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) { | |||||
// make sure we are still open | |||||
if this.open { | |||||
inputLeft := C.size_t(len(input)) | |||||
outputLeft := C.size_t(len(output)) | |||||
if inputLeft > 0 && outputLeft > 0 { | |||||
// we have to give iconv a pointer to a pointer of the underlying | |||||
// storage of each byte slice - so far this is the simplest | |||||
// way i've found to do that in Go, but it seems ugly | |||||
inputPointer := (*C.char)(unsafe.Pointer(&input[0])) | |||||
outputPointer := (*C.char)(unsafe.Pointer(&output[0])) | |||||
_, err = C.call_iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft) | |||||
// update byte counters | |||||
bytesRead = len(input) - int(inputLeft) | |||||
bytesWritten = len(output) - int(outputLeft) | |||||
} else if inputLeft == 0 && outputLeft > 0 { | |||||
// inputPointer will be nil, outputPointer is generated as above | |||||
outputPointer := (*C.char)(unsafe.Pointer(&output[0])) | |||||
_, err = C.call_iconv(this.context, nil, &inputLeft, outputPointer, &outputLeft) | |||||
// update write byte counter | |||||
bytesWritten = len(output) - int(outputLeft) | |||||
} else { | |||||
// both input and output are zero length, do a shift state reset | |||||
_, err = C.call_iconv(this.context, nil, &inputLeft, nil, &outputLeft) | |||||
} | |||||
} else { | |||||
err = syscall.EBADF | |||||
func (converter *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) { | |||||
inputLeft := C.size_t(len(input)) | |||||
outputLeft := C.size_t(len(output)) | |||||
var inputPointer, outputPointer *C.char | |||||
if inputLeft > 0 { | |||||
inputPointer = (*C.char)(unsafe.Pointer(&input[0])) | |||||
} | |||||
if outputLeft > 0 { | |||||
outputPointer = (*C.char)(unsafe.Pointer(&output[0])) | |||||
} | } | ||||
_, err = C.call_iconv(converter.context, inputPointer, &inputLeft, outputPointer, &outputLeft) | |||||
bytesRead = len(input) - int(inputLeft) | |||||
bytesWritten = len(output) - int(outputLeft) | |||||
return bytesRead, bytesWritten, err | return bytesRead, bytesWritten, err | ||||
} | } | ||||
// Convert an input string | // Convert an input string | ||||
// | // | ||||
// EILSEQ error may be returned if input contains invalid bytes for the | |||||
// Converter's fromEncoding. | |||||
func (this *Converter) ConvertString(input string) (output string, err error) { | |||||
// make sure we are still open | |||||
if this.open { | |||||
// construct the buffers | |||||
inputBuffer := []byte(input) | |||||
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later | |||||
// call Convert until all input bytes are read or an error occurs | |||||
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int | |||||
for totalBytesRead < len(inputBuffer) && err == nil { | |||||
// use the totals to create buffer slices | |||||
bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:]) | |||||
totalBytesRead += bytesRead | |||||
totalBytesWritten += bytesWritten | |||||
// check for the E2BIG error specifically, we can add to the output | |||||
// buffer to correct for it and then continue | |||||
if err == syscall.E2BIG { | |||||
// increase the size of the output buffer by another input length | |||||
// first, create a new buffer | |||||
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer)) | |||||
// copy the existing data | |||||
copy(tempBuffer, outputBuffer) | |||||
// switch the buffers | |||||
outputBuffer = tempBuffer | |||||
// forget the error | |||||
// EILSEQ error may be returned if input contains invalid bytes for the Converter's fromEncoding | |||||
func (converter *Converter) ConvertString(input string) (output string, err error) { | |||||
// construct the buffers | |||||
inputBuffer := []byte(input) | |||||
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later | |||||
// call Convert until all input bytes are read or an error occurs | |||||
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int | |||||
for totalBytesRead < len(inputBuffer) && err == nil { | |||||
// use the totals to create buffer slices | |||||
bytesRead, bytesWritten, err = converter.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:]) | |||||
totalBytesRead += bytesRead | |||||
totalBytesWritten += bytesWritten | |||||
switch err { | |||||
case syscall.E2BIG: | |||||
// increase the size of the output buffer by another input length | |||||
// first, create a new buffer | |||||
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer)) | |||||
// copy the existing data | |||||
copy(tempBuffer, outputBuffer) | |||||
// switch the buffers | |||||
outputBuffer = tempBuffer | |||||
// forget the error | |||||
err = nil | |||||
case syscall.EILSEQ, syscall.EINVAL: | |||||
// iconv can still return these in cases where it still can proceed such as //IGNORE | |||||
if bytesRead > 0 || bytesWritten > 0 { | |||||
err = nil | err = nil | ||||
} | } | ||||
} | } | ||||
} | |||||
if err == nil { | |||||
// perform a final shift state reset | |||||
_, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:]) | |||||
// update total count | |||||
totalBytesWritten += bytesWritten | |||||
} | |||||
if err == nil { | |||||
// perform a final shift state reset | |||||
_, bytesWritten, err = converter.Convert(nil, outputBuffer[totalBytesWritten:]) | |||||
// construct the final output string | |||||
output = string(outputBuffer[:totalBytesWritten]) | |||||
} else { | |||||
err = syscall.EBADF | |||||
// update total count | |||||
totalBytesWritten += bytesWritten | |||||
} | } | ||||
// construct the final output string | |||||
output = string(outputBuffer[:totalBytesWritten]) | |||||
return output, err | return output, err | ||||
} | } | ||||
func finalizeConverter(converter *Converter) { | |||||
converter.Close() | |||||
} |
@@ -1,6 +1,9 @@ | |||||
package iconv | package iconv | ||||
import ( | import ( | ||||
"bytes" | |||||
"io" | |||||
"strings" | |||||
"syscall" | "syscall" | ||||
"testing" | "testing" | ||||
) | ) | ||||
@@ -13,105 +16,486 @@ type iconvTest struct { | |||||
outputEncoding string | outputEncoding string | ||||
bytesRead int | bytesRead int | ||||
bytesWritten int | bytesWritten int | ||||
err error | |||||
convertErr error // err from Convert (raw iconv) | |||||
err error // err from CovertString, Reader, Writer | |||||
} | } | ||||
var iconvTests = []iconvTest{ | |||||
iconvTest{ | |||||
"simple utf-8 to latin1 conversion success", | |||||
"Hello World!", "utf-8", | |||||
"Hello World!", "latin1", | |||||
12, 12, nil, | |||||
}, | |||||
iconvTest{ | |||||
"invalid source encoding causes EINVAL", | |||||
"", "doesnotexist", | |||||
"", "utf-8", | |||||
0, 0, syscall.EINVAL, | |||||
}, | |||||
iconvTest{ | |||||
"invalid destination encoding causes EINVAL", | |||||
"", "utf-8", | |||||
"", "doesnotexist", | |||||
0, 0, syscall.EINVAL, | |||||
}, | |||||
iconvTest{ | |||||
"invalid input sequence causes EILSEQ", | |||||
"\xFF", "utf-8", | |||||
"", "latin1", | |||||
0, 0, syscall.EILSEQ, | |||||
}, | |||||
iconvTest{ | |||||
"invalid input causes partial output and EILSEQ", | |||||
"Hello\xFF", "utf-8", | |||||
"Hello", "latin1", | |||||
5, 5, syscall.EILSEQ, | |||||
}, | |||||
var ( | |||||
iconvTests = []iconvTest{ | |||||
iconvTest{ | |||||
"simple utf-8 to latin1 conversion success", | |||||
"Hello World!", "utf-8", | |||||
"Hello World!", "latin1", | |||||
12, 12, nil, nil, | |||||
}, | |||||
iconvTest{ | |||||
"invalid source encoding causes EINVAL", | |||||
"", "doesnotexist", | |||||
"", "utf-8", | |||||
0, 0, syscall.EINVAL, syscall.EINVAL, | |||||
}, | |||||
iconvTest{ | |||||
"invalid destination encoding causes EINVAL", | |||||
"", "utf-8", | |||||
"", "doesnotexist", | |||||
0, 0, syscall.EINVAL, syscall.EINVAL, | |||||
}, | |||||
iconvTest{ | |||||
"utf-8 to utf-8 passthrough", | |||||
"Hello world!", "utf-8", | |||||
"Hello world!", "utf-8", | |||||
12, 12, nil, nil, | |||||
}, | |||||
iconvTest{ | |||||
"utf-8 to utf-8 partial", | |||||
"Hello\xFFWorld!", "utf-8", | |||||
"Hello", "utf-8", | |||||
5, 5, syscall.EILSEQ, syscall.EILSEQ, | |||||
}, | |||||
iconvTest{ | |||||
"utf-8 to utf-8 ignored", | |||||
"Hello \xFFWorld!", "utf-8", | |||||
"Hello World!", "utf-8//IGNORE", | |||||
13, 12, syscall.EILSEQ, nil, | |||||
}, | |||||
iconvTest{ | |||||
"invalid input sequence causes EILSEQ", | |||||
"\xFF", "utf-8", | |||||
"", "latin1", | |||||
0, 0, syscall.EILSEQ, syscall.EILSEQ, | |||||
}, | |||||
iconvTest{ | |||||
"incomplete input sequence causes EINVAL", | |||||
"\xC2", "utf-8", | |||||
"", "latin1", | |||||
0, 0, syscall.EINVAL, syscall.EINVAL, | |||||
}, | |||||
iconvTest{ | |||||
"invalid input causes partial output and EILSEQ", | |||||
"Hello\xFF", "utf-8", | |||||
"Hello", "latin1", | |||||
5, 5, syscall.EILSEQ, syscall.EILSEQ, | |||||
}, | |||||
iconvTest{ | |||||
"incomplete input causes partial output and EILSEQ", | |||||
"Hello\xC2", "utf-8", | |||||
"Hello", "latin1", | |||||
5, 5, syscall.EINVAL, syscall.EINVAL, | |||||
}, | |||||
/* this is only true for glibc / iconv | |||||
iconvTest{ | |||||
"valid input but no conversion causes EILSEQ", | |||||
"你好世界 Hello World", "utf-8", | |||||
"", "latin1", | |||||
0, 0, syscall.EILSEQ, syscall.EILSEQ, | |||||
},*/ | |||||
iconvTest{ | |||||
"invalid input with ignore", | |||||
"Hello\xFF World!", "utf-8", | |||||
"Hello World!", "latin1//IGNORE", | |||||
13, 12, syscall.EILSEQ, nil, | |||||
}, | |||||
iconvTest{ | |||||
"valid input but no conversion with IGNORE", | |||||
"你好世界 Hello World", "utf-8", | |||||
" Hello World", "latin1//IGNORE", | |||||
24, 12, syscall.EILSEQ, nil, | |||||
}, | |||||
iconvTest{ | |||||
"valid input but no conversion with TRANSLIT", | |||||
"你好世界 Hello World", "utf-8", | |||||
"???? Hello World", "latin1//TRANSLIT", | |||||
24, 16, nil, nil, | |||||
}, | |||||
} | |||||
ignoreDetected, translitDetected bool | |||||
) | |||||
func init() { | |||||
// detect if IGNORE / TRANSLIT is supported (glic / libiconv) | |||||
conv, err := NewConverter("utf-8", "ascii//IGNORE") | |||||
if err == nil { | |||||
ignoreDetected = true | |||||
conv.Close() | |||||
} | |||||
conv, err = NewConverter("utf-8", "ascii//TRANSLIT") | |||||
if err == nil { | |||||
translitDetected = true | |||||
conv.Close() | |||||
} | |||||
} | } | ||||
func TestConvertString(t *testing.T) { | |||||
func runTests(t *testing.T, f func(iconvTest, *testing.T) (int, int, string, error)) { | |||||
for _, test := range iconvTests { | for _, test := range iconvTests { | ||||
// perform the conversion | |||||
output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding) | |||||
t.Run(test.description, func(t *testing.T) { | |||||
if !ignoreDetected && strings.HasSuffix(test.outputEncoding, "//IGNORE") { | |||||
t.Skip("//IGNORE not supported") | |||||
} | |||||
// check that output and err match | |||||
if output != test.output { | |||||
t.Errorf("test \"%s\" failed, output did not match expected", test.description) | |||||
} | |||||
if !translitDetected && strings.HasSuffix(test.outputEncoding, "//TRANSLIT") { | |||||
t.Skip("//TRANSLIT not supported") | |||||
} | |||||
// check that err is same as expected | |||||
if err != test.err { | |||||
if test.err != nil { | |||||
if err != nil { | |||||
t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err) | |||||
bytesRead, bytesWritten, output, err := f(test, t) | |||||
// check that bytesRead is same as expected | |||||
if bytesRead != test.bytesRead { | |||||
t.Errorf("bytesRead: %d expected: %d", bytesRead, test.bytesRead) | |||||
} | |||||
// check that bytesWritten is same as expected | |||||
if bytesWritten != test.bytesWritten { | |||||
t.Errorf("bytesWritten: %d expected: %d", bytesWritten, test.bytesWritten) | |||||
} | |||||
// check output bytes against expected | |||||
if output != test.output { | |||||
t.Errorf("output: %x expected: %x", output, test.output) | |||||
} | |||||
// check that err is same as expected | |||||
if err != test.err { | |||||
if test.err != nil { | |||||
if err != nil { | |||||
t.Errorf("err: %q expected: %q", err, test.err) | |||||
} else { | |||||
t.Errorf("err: nil expected %q", test.err) | |||||
} | |||||
} else { | } else { | ||||
t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err) | |||||
t.Errorf("unexpected error: %q", err) | |||||
} | } | ||||
} else { | |||||
t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err) | |||||
} | } | ||||
} | |||||
}) | |||||
} | } | ||||
} | } | ||||
func TestConvert(t *testing.T) { | func TestConvert(t *testing.T) { | ||||
for _, test := range iconvTests { | |||||
// setup input buffer | |||||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||||
input := []byte(test.input) | input := []byte(test.input) | ||||
// setup a buffer as large as the expected bytesWritten | |||||
output := make([]byte, 50) | output := make([]byte, 50) | ||||
// peform the conversion | // peform the conversion | ||||
bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding) | bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding) | ||||
// check that bytesRead is same as expected | |||||
if bytesRead != test.bytesRead { | |||||
t.Errorf("test \"%s\" failed, bytesRead did not match expected", test.description) | |||||
// HACK Convert has different erorrs, so check ourselves, and then fake out later check | |||||
if err != test.convertErr { | |||||
if test.convertErr != nil { | |||||
if err != nil { | |||||
t.Errorf("err: %q expected: %q", err, test.convertErr) | |||||
} else { | |||||
t.Errorf("err: nil expected %q", test.convertErr) | |||||
} | |||||
} else { | |||||
t.Errorf("unexpected error: %q", err) | |||||
} | |||||
} | } | ||||
err = test.err | |||||
// check that bytesWritten is same as expected | |||||
if bytesWritten != test.bytesWritten { | |||||
t.Errorf("test \"%s\" failed, bytesWritten did not match expected", test.description) | |||||
} | |||||
return bytesRead, bytesWritten, string(output[:bytesWritten]), err | |||||
}) | |||||
} | |||||
// check output bytes against expected - simplest to convert output to | |||||
// string and then do an equality check which is actually a byte wise operation | |||||
if string(output[:bytesWritten]) != test.output { | |||||
t.Errorf("test \"%s\" failed, output did not match expected", test.description) | |||||
} | |||||
func TestConvertString(t *testing.T) { | |||||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||||
// perform the conversion | |||||
output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding) | |||||
// check that err is same as expected | |||||
if err != test.err { | |||||
if test.err != nil { | |||||
if err != nil { | |||||
t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err) | |||||
} else { | |||||
t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err) | |||||
// bytesRead and bytesWritten are spoofed a little | |||||
return test.bytesRead, len(output), output, err | |||||
}) | |||||
} | |||||
func TestReader(t *testing.T) { | |||||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||||
var bytesRead, bytesWritten, finalBytesWritten int | |||||
var err error | |||||
input := bytes.NewBufferString(test.input) | |||||
output := make([]byte, 50) | |||||
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding) | |||||
if err == nil { | |||||
bytesWritten, err = reader.Read(output) | |||||
// we can compute how many bytes iconv read by inspecting the reader state | |||||
bytesRead = len([]byte(test.input)) - input.Len() - (reader.writePos - reader.readPos) | |||||
// with current tests and buffer sizes, we'd expect all input to be buffered if we called read | |||||
if input.Len() != 0 { | |||||
t.Error("not all bytes from input were buffered") | |||||
} | |||||
// do final read test if we can - either get EOF or same test error | |||||
if err == nil { | |||||
finalBytesWritten, err = reader.Read(output[bytesWritten:]) | |||||
if finalBytesWritten != 0 { | |||||
t.Errorf("finalBytesWritten: %d expected: 0", finalBytesWritten) | |||||
} | |||||
if err == io.EOF { | |||||
err = nil | |||||
} | } | ||||
} else { | |||||
t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err) | |||||
} | } | ||||
} | } | ||||
return bytesRead, bytesWritten, string(output[:bytesWritten]), err | |||||
}) | |||||
} | |||||
func TestWriter(t *testing.T) { | |||||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||||
var bytesRead, bytesWritten int | |||||
var err error | |||||
input := []byte(test.input) | |||||
output := new(bytes.Buffer) | |||||
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding) | |||||
if err == nil { | |||||
bytesRead, err = writer.Write(input) | |||||
bytesRead -= writer.readPos | |||||
writer.Close() | |||||
bytesWritten = output.Len() | |||||
} | |||||
return bytesRead, bytesWritten, output.String(), err | |||||
}) | |||||
} | |||||
func TestReaderWithCopy(t *testing.T) { | |||||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||||
input := bytes.NewBufferString(test.input) | |||||
output := new(bytes.Buffer) | |||||
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding) | |||||
if err == nil { | |||||
_, err := io.Copy(output, reader) | |||||
bytesRead := len(test.input) - input.Len() - reader.writePos | |||||
bytesWritten := output.Len() | |||||
return bytesRead, bytesWritten, output.String(), err | |||||
} | |||||
return 0, 0, output.String(), err | |||||
}) | |||||
} | |||||
func TestWriterWithCopy(t *testing.T) { | |||||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||||
input := bytes.NewBufferString(test.input) | |||||
output := new(bytes.Buffer) | |||||
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding) | |||||
if err == nil { | |||||
bytesCopied, err := io.Copy(writer, input) | |||||
bytesRead := int(bytesCopied) - writer.readPos | |||||
writer.Close() | |||||
bytesWritten := output.Len() | |||||
return bytesRead, bytesWritten, output.String(), err | |||||
} | |||||
return 0, 0, output.String(), err | |||||
}) | |||||
} | |||||
func TestReaderMultipleReads(t *testing.T) { | |||||
// setup a source reader and our expected output string | |||||
source := bytes.NewBufferString("\x80\x8A\x99\x95\x8B\x86\x87") | |||||
expected := "€Š™•‹†‡" | |||||
// setup reader - use our minimum buffer size so we can force it to shuffle the buffer around | |||||
reader, err := NewReaderSized(source, "cp1252", "utf-8", minReadBufferSize) | |||||
if err != nil { | |||||
if err == syscall.EINVAL { | |||||
t.Skip("Either cp1252 or utf-8 isn't supported by iconv on your system") | |||||
} else { | |||||
t.Fatalf("Unexpected error when creating reader: %s", err) | |||||
} | |||||
} | |||||
// setup a read buffer - we'll slice it to different sizes in our tests | |||||
buffer := make([]byte, 64) | |||||
// first read should fill internal buffer, but we'll only read part of it | |||||
bytesRead, err := reader.Read(buffer[:5]) | |||||
if bytesRead != 5 || err != nil { | |||||
t.Fatalf("first read did not give expected 5, nil: %d, %s", bytesRead, err) | |||||
} | |||||
// because of how small teh source is and our minimum buffer size, source shoudl be fully read | |||||
if source.Len() != 0 { | |||||
t.Fatalf("first read did not buffer all of source like expected: %d bytes remain", source.Len()) | |||||
} | |||||
// Buffer doesn't return EOF with last bytes, reader shouldn't know its EOF yet | |||||
if reader.eof { | |||||
t.Fatalf("first read was not expected to receive EOF") | |||||
} | |||||
// second read should shift internal buffer, and fill again - make buffer too small for last utf-8 character | |||||
// E2BIG from iconv should be ignored because we wrote at least 1 byte | |||||
bytesRead, err = reader.Read(buffer[5:18]) | |||||
if bytesRead != 12 || err != nil { | |||||
t.Fatalf("second read did not give expected 15, nil: %d, %s", bytesRead, err) | |||||
} | |||||
if !reader.eof { | |||||
t.Fatalf("second read did not put reader into eof state") | |||||
} | |||||
// try to read the last 3 byte character with only a buffer of 2 bytes - this time we should see the E2BIG | |||||
bytesRead, err = reader.Read(buffer[17:19]) | |||||
if bytesRead != 0 || err != syscall.E2BIG { | |||||
t.Fatalf("third read did not give expected 0, E2BIG: %d, %s", bytesRead, err) | |||||
} | |||||
// fourth read should finish last character | |||||
bytesRead, err = reader.Read(buffer[17:]) | |||||
if bytesRead != 3 || err != nil { | |||||
t.Fatalf("fourth read did not give expected 3, nil: %d, %s", bytesRead, err) | |||||
} | |||||
// last read should be EOF | |||||
bytesRead, err = reader.Read(buffer[20:]) | |||||
if bytesRead != 0 || err != io.EOF { | |||||
t.Fatalf("final read did not give expected 0, EOF: %d, %s", bytesRead, err) | |||||
} | |||||
// check full utf-8 output | |||||
if string(buffer[:20]) != expected { | |||||
t.Fatalf("output did not match expected %q: %q", expected, string(buffer[:20])) | |||||
} | |||||
} | |||||
func TestWriteWithIncompleteSequence(t *testing.T) { | |||||
expected := "\x80\x8A\x99\x95\x8B\x86\x87" | |||||
input := []byte("€Š™•‹†‡") | |||||
output := new(bytes.Buffer) | |||||
writer, err := NewWriter(output, "utf-8", "cp1252") | |||||
if err != nil { | |||||
t.Fatalf("unexpected error while creating writer %q", err) | |||||
} | |||||
// the input string is made of 3 byte characters, for the test we want to only write part of the last character | |||||
bytesFromBuffer := len(input) - 2 | |||||
bytesRead, err := writer.Write(input[:bytesFromBuffer]) | |||||
if bytesRead != bytesFromBuffer { | |||||
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) | |||||
} | |||||
// finish the rest | |||||
bytesRead, err = writer.Write(input[bytesFromBuffer:]) | |||||
if bytesRead != 2 { | |||||
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err) | |||||
} | |||||
err = writer.Close() | |||||
actual := output.String() | |||||
if err != nil { | |||||
t.Errorf("got an error on close: %s", err) | |||||
} | |||||
if actual != expected { | |||||
t.Errorf("output %x did not match expected %x", actual, expected) | |||||
} | |||||
} | |||||
func TestWriteWithIncompleteSequenceAndIgnore(t *testing.T) { | |||||
if !ignoreDetected { | |||||
t.Skip("//IGNORE not supported") | |||||
} | |||||
expected := "\x80\x8A\x99\x95\x8B\x86\x87" | |||||
input := []byte("€Š™•‹†‡") | |||||
output := new(bytes.Buffer) | |||||
writer, err := NewWriter(output, "utf-8", "cp1252//IGNORE") | |||||
if err != nil { | |||||
t.Fatalf("unexpected error while creating writer %q", err) | |||||
} | |||||
// the input string is made of 3 byte characters, for the test we want to only write part of the last character | |||||
bytesFromBuffer := len(input) - 2 | |||||
bytesRead, err := writer.Write(input[:bytesFromBuffer]) | |||||
if bytesRead != bytesFromBuffer { | |||||
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) | |||||
} | |||||
// finish the rest | |||||
bytesRead, err = writer.Write(input[bytesFromBuffer:]) | |||||
if bytesRead != 2 { | |||||
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err) | |||||
} | |||||
err = writer.Close() | |||||
actual := output.String() | |||||
if err != nil { | |||||
t.Errorf("got an error on close: %s", err) | |||||
} | |||||
if actual != expected { | |||||
t.Errorf("output %x did not match expected %x", actual, expected) | |||||
} | |||||
} | |||||
func TestWriteWithIncompleteSequenceAtEOF(t *testing.T) { | |||||
expected := "\x80\x8A\x99\x95\x8B\x86" | |||||
input := []byte("€Š™•‹†‡") | |||||
output := new(bytes.Buffer) | |||||
writer, err := NewWriter(output, "utf-8", "cp1252") | |||||
if err != nil { | |||||
t.Fatalf("unexpected error while creating writer %q", err) | |||||
} | |||||
// the input string is made of 3 byte characters, for the test we want to only write part of the last character | |||||
bytesFromBuffer := len(input) - 2 | |||||
bytesRead, err := writer.Write(input[:bytesFromBuffer]) | |||||
if bytesRead != bytesFromBuffer { | |||||
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) | |||||
} | |||||
err = writer.Close() | |||||
actual := output.String() | |||||
if err != nil { | |||||
t.Errorf("got an error on close: %s", err) | |||||
} | |||||
if actual != expected { | |||||
t.Errorf("output %x did not match expected %x", actual, expected) | |||||
} | } | ||||
} | } |
@@ -2,7 +2,12 @@ package iconv | |||||
import ( | import ( | ||||
"io" | "io" | ||||
"syscall" | |||||
"runtime" | |||||
) | |||||
const ( | |||||
defaultReadBufferSize = 8 * 1024 | |||||
minReadBufferSize = 16 | |||||
) | ) | ||||
type Reader struct { | type Reader struct { | ||||
@@ -10,91 +15,100 @@ type Reader struct { | |||||
converter *Converter | converter *Converter | ||||
buffer []byte | buffer []byte | ||||
readPos, writePos int | readPos, writePos int | ||||
err error | |||||
eof bool | |||||
} | } | ||||
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, error) { | |||||
// create a converter | |||||
converter, err := NewConverter(fromEncoding, toEncoding) | |||||
if err == nil { | |||||
return NewReaderFromConverter(source, converter), err | |||||
} | |||||
func NewReader(source io.Reader, fromEncoding, toEncoding string) (*Reader, error) { | |||||
return NewReaderSized(source, fromEncoding, toEncoding, defaultReadBufferSize) | |||||
} | |||||
// return the error | |||||
return nil, err | |||||
func NewReaderFromConverter(source io.Reader, converter *Converter) *Reader { | |||||
return NewReaderFromConverterSized(source, converter, defaultReadBufferSize) | |||||
} | } | ||||
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) { | |||||
reader = new(Reader) | |||||
func NewReaderSized(source io.Reader, fromEncoding, toEncoding string, size int) (*Reader, error) { | |||||
converter, err := NewConverter(fromEncoding, toEncoding) | |||||
// copy elements | |||||
reader.source = source | |||||
reader.converter = converter | |||||
if err != nil { | |||||
return nil, err | |||||
} | |||||
// create 8K buffers | |||||
reader.buffer = make([]byte, 8*1024) | |||||
// add a finalizer for the converter we created | |||||
runtime.SetFinalizer(converter, finalizeConverter) | |||||
return reader | |||||
return NewReaderFromConverterSized(source, converter, size), nil | |||||
} | } | ||||
func (this *Reader) fillBuffer() { | |||||
// slide existing data to beginning | |||||
if this.readPos > 0 { | |||||
// copy current bytes - is this guaranteed safe? | |||||
copy(this.buffer, this.buffer[this.readPos:this.writePos]) | |||||
// adjust positions | |||||
this.writePos -= this.readPos | |||||
this.readPos = 0 | |||||
func NewReaderFromConverterSized(source io.Reader, converter *Converter, size int) *Reader { | |||||
if size < minReadBufferSize { | |||||
size = minReadBufferSize | |||||
} | } | ||||
// read new data into buffer at write position | |||||
bytesRead, err := this.source.Read(this.buffer[this.writePos:]) | |||||
// adjust write position | |||||
this.writePos += bytesRead | |||||
// track any reader error / EOF | |||||
if err != nil { | |||||
this.err = err | |||||
return &Reader{ | |||||
source: source, | |||||
converter: converter, | |||||
buffer: make([]byte, size), | |||||
} | } | ||||
} | } | ||||
// implement the io.Reader interface | |||||
func (this *Reader) Read(p []byte) (n int, err error) { | |||||
// checks for when we have no data | |||||
for this.writePos == 0 || this.readPos == this.writePos { | |||||
// if we have an error / EOF, just return it | |||||
if this.err != nil { | |||||
return n, this.err | |||||
func (r *Reader) Read(p []byte) (int, error) { | |||||
if len(p) == 0 { | |||||
return 0, nil | |||||
} | |||||
var bytesRead, bytesWritten int | |||||
var err error | |||||
// setup for a single read into buffer if possible | |||||
if !r.eof { | |||||
if r.readPos > 0 { | |||||
// slide data to front of buffer | |||||
r.readPos, r.writePos = 0, copy(r.buffer, r.buffer[r.readPos:r.writePos]) | |||||
} | } | ||||
// else, fill our buffer | |||||
this.fillBuffer() | |||||
} | |||||
if r.writePos < len(r.buffer) { | |||||
// do the single read | |||||
bytesRead, err = r.source.Read(r.buffer[r.writePos:]) | |||||
// TODO: checks for when we have less data than len(p) | |||||
if bytesRead < 0 { | |||||
panic("iconv: source reader returned negative count from Read") | |||||
} | |||||
// we should have an appropriate amount of data, convert it into the given buffer | |||||
bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p) | |||||
r.writePos += bytesRead | |||||
r.eof = err == io.EOF | |||||
} | |||||
} | |||||
// adjust byte counters | |||||
this.readPos += bytesRead | |||||
n += bytesWritten | |||||
if r.readPos < r.writePos || r.eof { | |||||
// convert any buffered data we have, or do a final reset (for shift based conversions) | |||||
bytesRead, bytesWritten, err = r.converter.Convert(r.buffer[r.readPos:r.writePos], p) | |||||
r.readPos += bytesRead | |||||
// if we experienced an iconv error, check it | |||||
if err != nil { | |||||
// E2BIG errors can be ignored (we'll get them often) as long | |||||
// as at least 1 byte was written. If we experienced an E2BIG | |||||
// and no bytes were written then the buffer is too small for | |||||
// even the next character | |||||
if err != syscall.E2BIG || bytesWritten == 0 { | |||||
// track anything else | |||||
this.err = err | |||||
// if we experienced an iconv error and didn't make progress, report it. | |||||
// if we did make progress, it may be informational only (i.e. reporting | |||||
// an EILSEQ even when using //ignore to skip them) | |||||
if err != nil && bytesWritten == 0 { | |||||
return bytesWritten, err | |||||
} | } | ||||
// signal an EOF only if we didn't write anything - accomodates premature | |||||
// errror checking in user code | |||||
if bytesWritten == 0 && r.eof { | |||||
return 0, io.EOF | |||||
} | |||||
return bytesWritten, nil | |||||
} | } | ||||
// return our results | |||||
return n, this.err | |||||
return 0, err | |||||
} | |||||
func (r *Reader) Reset(source io.Reader) { | |||||
r.converter.Reset() | |||||
*r = Reader{ | |||||
source: source, | |||||
converter: r.converter, | |||||
buffer: r.buffer, | |||||
} | |||||
} | } |
@@ -1,82 +1,181 @@ | |||||
package iconv | package iconv | ||||
import "io" | |||||
import ( | |||||
"io" | |||||
"runtime" | |||||
"syscall" | |||||
) | |||||
const ( | |||||
defaultWriteBufferSize = 8 * 1024 | |||||
minWriteBufferSize = 16 | |||||
) | |||||
type Writer struct { | type Writer struct { | ||||
destination io.Writer | |||||
converter *Converter | |||||
buffer []byte | |||||
readPos, writePos int | |||||
err error | |||||
destination io.Writer | |||||
converter *Converter | |||||
readBuffer, writeBuffer []byte | |||||
readPos, writePos int | |||||
} | } | ||||
func NewWriter(destination io.Writer, fromEncoding string, toEncoding string) (*Writer, error) { | func NewWriter(destination io.Writer, fromEncoding string, toEncoding string) (*Writer, error) { | ||||
// create a converter | |||||
return NewWriterSized(destination, fromEncoding, toEncoding, defaultWriteBufferSize) | |||||
} | |||||
func NewWriterFromConverter(destination io.Writer, converter *Converter) (writer *Writer) { | |||||
return NewWriterFromConverterSized(destination, converter, defaultWriteBufferSize) | |||||
} | |||||
func NewWriterSized(destination io.Writer, fromEncoding, toEncoding string, size int) (*Writer, error) { | |||||
converter, err := NewConverter(fromEncoding, toEncoding) | converter, err := NewConverter(fromEncoding, toEncoding) | ||||
if err == nil { | |||||
return NewWriterFromConverter(destination, converter), err | |||||
if err != nil { | |||||
return nil, err | |||||
} | } | ||||
// return the error | |||||
return nil, err | |||||
// add a finalizer for the converter we created | |||||
runtime.SetFinalizer(converter, finalizeConverter) | |||||
return NewWriterFromConverterSized(destination, converter, size), nil | |||||
} | } | ||||
func NewWriterFromConverter(destination io.Writer, converter *Converter) (writer *Writer) { | |||||
writer = new(Writer) | |||||
func NewWriterFromConverterSized(destination io.Writer, converter *Converter, size int) *Writer { | |||||
if size < minWriteBufferSize { | |||||
size = minWriteBufferSize | |||||
} | |||||
// copy elements | |||||
writer.destination = destination | |||||
writer.converter = converter | |||||
return &Writer{ | |||||
destination: destination, | |||||
converter: converter, | |||||
readBuffer: make([]byte, size), | |||||
writeBuffer: make([]byte, size), | |||||
} | |||||
} | |||||
// create 8K buffers | |||||
writer.buffer = make([]byte, 8*1024) | |||||
// Implements io.Writer | |||||
// | |||||
// Will attempt to convert all of p into buffer. If there's not enough room in | |||||
// the buffer to hold all converted bytes, the buffer will be flushed and p will | |||||
// continue to be processed. Close should be called on a writer when finished | |||||
// with all writes, to ensure final shift sequences are written and buffer is | |||||
// flushed to underlying io.Writer. | |||||
// | |||||
// Can return all errors that Convert can, as well as any errors from Flush. Note | |||||
// that some errors from Convert are suppressed if we continue making progress | |||||
// on p. | |||||
func (w *Writer) Write(p []byte) (int, error) { | |||||
var totalBytesRead, bytesRead, bytesWritten int | |||||
var err error | |||||
if w.readPos == 0 || len(p) == 0 { | |||||
bytesRead, bytesWritten, err = w.converter.Convert(p, w.writeBuffer[w.writePos:]) | |||||
totalBytesRead += bytesRead | |||||
w.writePos += bytesWritten | |||||
w.readPos = 0 | |||||
} else { | |||||
// we have left over bytes from previous write that weren't complete and there's at least | |||||
// one byte being written, fill read buffer with p and try to convert, if we make progress | |||||
// we can continue conversion from p itself | |||||
bytesCopied := copy(w.readBuffer[w.readPos:], p) | |||||
bytesRead, bytesWritten, err = w.converter.Convert(w.readBuffer[:w.readPos+bytesCopied], w.writeBuffer[w.writePos:]) | |||||
// if we made no progress, give up | |||||
if bytesRead <= w.readPos { | |||||
return 0, err | |||||
} | |||||
return writer | |||||
} | |||||
bytesRead -= w.readPos | |||||
totalBytesRead += bytesRead | |||||
func (this *Writer) emptyBuffer() { | |||||
// write new data out of buffer | |||||
bytesWritten, err := this.destination.Write(this.buffer[this.readPos:this.writePos]) | |||||
w.readPos = 0 | |||||
w.writePos += bytesWritten | |||||
} | |||||
// update read position | |||||
this.readPos += bytesWritten | |||||
// try to process all of p - lots of io functions don't like short writes. | |||||
// | |||||
// There are a few error cases we need to treat specially, as long as we've | |||||
// made progress on p, E2BIG and EILSEQ should not be fatal. EINVAL isn't | |||||
// fatal as long as the rest of p fits in our buffers. | |||||
for err != nil && bytesRead > 0 { | |||||
switch err { | |||||
case syscall.E2BIG: | |||||
err = w.Flush() | |||||
case syscall.EILSEQ: | |||||
// IGNORE suffix still reports the error on convert | |||||
err = nil | |||||
// if no more bytes, don't do an empty convert (resets state) | |||||
if totalBytesRead == len(p) { | |||||
break | |||||
} | |||||
case syscall.EINVAL: | |||||
// if the rest of p fits in read buffer copy it there | |||||
if len(p[totalBytesRead:]) <= len(w.readBuffer) { | |||||
w.readPos = copy(w.readBuffer, p[totalBytesRead:]) | |||||
totalBytesRead += w.readPos | |||||
break | |||||
} | |||||
} | |||||
// slide existing data to beginning | |||||
if this.readPos > 0 { | |||||
// copy current bytes - is this guaranteed safe? | |||||
copy(this.buffer, this.buffer[this.readPos:this.writePos]) | |||||
// if not an ignoreable err or Flush err | |||||
if err != nil { | |||||
break | |||||
} | |||||
// adjust positions | |||||
this.writePos -= this.readPos | |||||
this.readPos = 0 | |||||
bytesRead, bytesWritten, err = w.converter.Convert(p[totalBytesRead:], w.writeBuffer[w.writePos:]) | |||||
totalBytesRead += bytesRead | |||||
w.writePos += bytesWritten | |||||
} | } | ||||
// track any reader error / EOF | |||||
if err != nil { | |||||
this.err = err | |||||
} | |||||
return totalBytesRead, err | |||||
} | } | ||||
// implement the io.Writer interface | |||||
func (this *Writer) Write(p []byte) (n int, err error) { | |||||
// write data into our internal buffer | |||||
bytesRead, bytesWritten, err := this.converter.Convert(p, this.buffer[this.writePos:]) | |||||
// Attempt to write any buffered data to destination writer. Returns error from | |||||
// Write call or io.ErrShortWrite if Write didn't report an error but also didn't | |||||
// accept all bytes given. | |||||
func (w *Writer) Flush() error { | |||||
if w.readPos < w.writePos { | |||||
bytesWritten, err := w.destination.Write(w.writeBuffer[:w.writePos]) | |||||
// update bytes written for return | |||||
n += bytesRead | |||||
this.writePos += bytesWritten | |||||
if bytesWritten < 0 { | |||||
panic("iconv: writer returned negative count from Write") | |||||
} | |||||
if bytesWritten > 0 { | |||||
w.writePos = copy(w.writeBuffer, w.writeBuffer[bytesWritten:w.writePos]) | |||||
} | |||||
// checks for when we have a full buffer | |||||
for this.writePos > 0 { | |||||
// if we have an error, just return it | |||||
if this.err != nil { | |||||
return | |||||
if err == nil && w.writePos > 0 { | |||||
err = io.ErrShortWrite | |||||
} | } | ||||
// else empty the buffer | |||||
this.emptyBuffer() | |||||
return err | |||||
} | } | ||||
return n, err | |||||
return nil | |||||
} | |||||
// Perform a final write with empty buffer, which allows iconv to close any shift | |||||
// sequences. A Flush is performed if needed. | |||||
func (w *Writer) Close() error { | |||||
_, err := w.Write(nil) | |||||
if err != nil { | |||||
return err | |||||
} | |||||
return w.Flush() | |||||
} | |||||
// Reset state and direct writes to a new destination writer | |||||
func (w *Writer) Reset(destination io.Writer) { | |||||
w.converter.Reset() | |||||
*w = Writer{ | |||||
destination: destination, | |||||
converter: w.converter, | |||||
readBuffer: w.readBuffer, | |||||
writeBuffer: w.writeBuffer, | |||||
} | |||||
} | } |