* add tests that cover same behaviors as Convert and ConvertString * align read and write behaviors with bufio to play nice * add methods that allow to customize buffer size * add methods to reset, allowing reusereader-writer-rework
@@ -6,6 +6,7 @@ package iconv | |||
#cgo windows LDFLAGS: -liconv | |||
#include <stdlib.h> | |||
#include <iconv.h> | |||
#include <locale.h> | |||
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic | |||
// Therofore we use this wrapper function, to avoid passing **char directly from go | |||
@@ -20,47 +21,40 @@ import "unsafe" | |||
type Converter struct { | |||
context C.iconv_t | |||
open bool | |||
} | |||
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by | |||
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if | |||
// there is not enough memory to initialize an iconv descriptor | |||
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) { | |||
converter = new(Converter) | |||
func NewConverter(fromEncoding string, toEncoding string) (*Converter, error) { | |||
// convert to C strings | |||
toEncodingC := C.CString(toEncoding) | |||
fromEncodingC := C.CString(fromEncoding) | |||
// open an iconv descriptor | |||
converter.context, err = C.iconv_open(toEncodingC, fromEncodingC) | |||
context, err := C.iconv_open(toEncodingC, fromEncodingC) | |||
// free the C Strings | |||
C.free(unsafe.Pointer(toEncodingC)) | |||
C.free(unsafe.Pointer(fromEncodingC)) | |||
// check err | |||
if err == nil { | |||
// no error, mark the context as open | |||
converter.open = true | |||
if err != nil { | |||
return nil, err | |||
} | |||
return | |||
return &Converter{context}, nil | |||
} | |||
// destroy is called during garbage collection | |||
func (this *Converter) destroy() { | |||
this.Close() | |||
// Close a Converter's iconv descriptor explicitly | |||
func (converter *Converter) Close() error { | |||
_, err := C.iconv_close(converter.context) | |||
return err | |||
} | |||
// Close a Converter's iconv description explicitly | |||
func (this *Converter) Close() (err error) { | |||
if this.open { | |||
_, err = C.iconv_close(this.context) | |||
} | |||
return | |||
// Reset state of iconv context | |||
func (converter *Converter) Reset() error { | |||
_, _, err := converter.Convert(nil, nil) | |||
return err | |||
} | |||
// Convert bytes from an input byte slice into a give output byte slice | |||
@@ -74,95 +68,82 @@ func (this *Converter) Close() (err error) { | |||
// For shift based output encodings, any end shift byte sequences can be generated by | |||
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output | |||
// will simply reset the iconv descriptor shift state without writing any bytes. | |||
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) { | |||
// make sure we are still open | |||
if this.open { | |||
inputLeft := C.size_t(len(input)) | |||
outputLeft := C.size_t(len(output)) | |||
if inputLeft > 0 && outputLeft > 0 { | |||
// we have to give iconv a pointer to a pointer of the underlying | |||
// storage of each byte slice - so far this is the simplest | |||
// way i've found to do that in Go, but it seems ugly | |||
inputPointer := (*C.char)(unsafe.Pointer(&input[0])) | |||
outputPointer := (*C.char)(unsafe.Pointer(&output[0])) | |||
_, err = C.call_iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft) | |||
// update byte counters | |||
bytesRead = len(input) - int(inputLeft) | |||
bytesWritten = len(output) - int(outputLeft) | |||
} else if inputLeft == 0 && outputLeft > 0 { | |||
// inputPointer will be nil, outputPointer is generated as above | |||
outputPointer := (*C.char)(unsafe.Pointer(&output[0])) | |||
_, err = C.call_iconv(this.context, nil, &inputLeft, outputPointer, &outputLeft) | |||
// update write byte counter | |||
bytesWritten = len(output) - int(outputLeft) | |||
} else { | |||
// both input and output are zero length, do a shift state reset | |||
_, err = C.call_iconv(this.context, nil, &inputLeft, nil, &outputLeft) | |||
} | |||
} else { | |||
err = syscall.EBADF | |||
func (converter *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) { | |||
inputLeft := C.size_t(len(input)) | |||
outputLeft := C.size_t(len(output)) | |||
var inputPointer, outputPointer *C.char | |||
if inputLeft > 0 { | |||
inputPointer = (*C.char)(unsafe.Pointer(&input[0])) | |||
} | |||
if outputLeft > 0 { | |||
outputPointer = (*C.char)(unsafe.Pointer(&output[0])) | |||
} | |||
_, err = C.call_iconv(converter.context, inputPointer, &inputLeft, outputPointer, &outputLeft) | |||
bytesRead = len(input) - int(inputLeft) | |||
bytesWritten = len(output) - int(outputLeft) | |||
return bytesRead, bytesWritten, err | |||
} | |||
// Convert an input string | |||
// | |||
// EILSEQ error may be returned if input contains invalid bytes for the | |||
// Converter's fromEncoding. | |||
func (this *Converter) ConvertString(input string) (output string, err error) { | |||
// make sure we are still open | |||
if this.open { | |||
// construct the buffers | |||
inputBuffer := []byte(input) | |||
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later | |||
// call Convert until all input bytes are read or an error occurs | |||
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int | |||
for totalBytesRead < len(inputBuffer) && err == nil { | |||
// use the totals to create buffer slices | |||
bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:]) | |||
totalBytesRead += bytesRead | |||
totalBytesWritten += bytesWritten | |||
// check for the E2BIG error specifically, we can add to the output | |||
// buffer to correct for it and then continue | |||
if err == syscall.E2BIG { | |||
// increase the size of the output buffer by another input length | |||
// first, create a new buffer | |||
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer)) | |||
// copy the existing data | |||
copy(tempBuffer, outputBuffer) | |||
// switch the buffers | |||
outputBuffer = tempBuffer | |||
// forget the error | |||
// EILSEQ error may be returned if input contains invalid bytes for the Converter's fromEncoding | |||
func (converter *Converter) ConvertString(input string) (output string, err error) { | |||
// construct the buffers | |||
inputBuffer := []byte(input) | |||
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later | |||
// call Convert until all input bytes are read or an error occurs | |||
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int | |||
for totalBytesRead < len(inputBuffer) && err == nil { | |||
// use the totals to create buffer slices | |||
bytesRead, bytesWritten, err = converter.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:]) | |||
totalBytesRead += bytesRead | |||
totalBytesWritten += bytesWritten | |||
switch err { | |||
case syscall.E2BIG: | |||
// increase the size of the output buffer by another input length | |||
// first, create a new buffer | |||
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer)) | |||
// copy the existing data | |||
copy(tempBuffer, outputBuffer) | |||
// switch the buffers | |||
outputBuffer = tempBuffer | |||
// forget the error | |||
err = nil | |||
case syscall.EILSEQ, syscall.EINVAL: | |||
// iconv can still return these in cases where it still can proceed such as //IGNORE | |||
if bytesRead > 0 || bytesWritten > 0 { | |||
err = nil | |||
} | |||
} | |||
} | |||
if err == nil { | |||
// perform a final shift state reset | |||
_, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:]) | |||
// update total count | |||
totalBytesWritten += bytesWritten | |||
} | |||
if err == nil { | |||
// perform a final shift state reset | |||
_, bytesWritten, err = converter.Convert(nil, outputBuffer[totalBytesWritten:]) | |||
// construct the final output string | |||
output = string(outputBuffer[:totalBytesWritten]) | |||
} else { | |||
err = syscall.EBADF | |||
// update total count | |||
totalBytesWritten += bytesWritten | |||
} | |||
// construct the final output string | |||
output = string(outputBuffer[:totalBytesWritten]) | |||
return output, err | |||
} | |||
func finalizeConverter(converter *Converter) { | |||
converter.Close() | |||
} |
@@ -1,6 +1,9 @@ | |||
package iconv | |||
import ( | |||
"bytes" | |||
"io" | |||
"strings" | |||
"syscall" | |||
"testing" | |||
) | |||
@@ -13,105 +16,486 @@ type iconvTest struct { | |||
outputEncoding string | |||
bytesRead int | |||
bytesWritten int | |||
err error | |||
convertErr error // err from Convert (raw iconv) | |||
err error // err from CovertString, Reader, Writer | |||
} | |||
var iconvTests = []iconvTest{ | |||
iconvTest{ | |||
"simple utf-8 to latin1 conversion success", | |||
"Hello World!", "utf-8", | |||
"Hello World!", "latin1", | |||
12, 12, nil, | |||
}, | |||
iconvTest{ | |||
"invalid source encoding causes EINVAL", | |||
"", "doesnotexist", | |||
"", "utf-8", | |||
0, 0, syscall.EINVAL, | |||
}, | |||
iconvTest{ | |||
"invalid destination encoding causes EINVAL", | |||
"", "utf-8", | |||
"", "doesnotexist", | |||
0, 0, syscall.EINVAL, | |||
}, | |||
iconvTest{ | |||
"invalid input sequence causes EILSEQ", | |||
"\xFF", "utf-8", | |||
"", "latin1", | |||
0, 0, syscall.EILSEQ, | |||
}, | |||
iconvTest{ | |||
"invalid input causes partial output and EILSEQ", | |||
"Hello\xFF", "utf-8", | |||
"Hello", "latin1", | |||
5, 5, syscall.EILSEQ, | |||
}, | |||
var ( | |||
iconvTests = []iconvTest{ | |||
iconvTest{ | |||
"simple utf-8 to latin1 conversion success", | |||
"Hello World!", "utf-8", | |||
"Hello World!", "latin1", | |||
12, 12, nil, nil, | |||
}, | |||
iconvTest{ | |||
"invalid source encoding causes EINVAL", | |||
"", "doesnotexist", | |||
"", "utf-8", | |||
0, 0, syscall.EINVAL, syscall.EINVAL, | |||
}, | |||
iconvTest{ | |||
"invalid destination encoding causes EINVAL", | |||
"", "utf-8", | |||
"", "doesnotexist", | |||
0, 0, syscall.EINVAL, syscall.EINVAL, | |||
}, | |||
iconvTest{ | |||
"utf-8 to utf-8 passthrough", | |||
"Hello world!", "utf-8", | |||
"Hello world!", "utf-8", | |||
12, 12, nil, nil, | |||
}, | |||
iconvTest{ | |||
"utf-8 to utf-8 partial", | |||
"Hello\xFFWorld!", "utf-8", | |||
"Hello", "utf-8", | |||
5, 5, syscall.EILSEQ, syscall.EILSEQ, | |||
}, | |||
iconvTest{ | |||
"utf-8 to utf-8 ignored", | |||
"Hello \xFFWorld!", "utf-8", | |||
"Hello World!", "utf-8//IGNORE", | |||
13, 12, syscall.EILSEQ, nil, | |||
}, | |||
iconvTest{ | |||
"invalid input sequence causes EILSEQ", | |||
"\xFF", "utf-8", | |||
"", "latin1", | |||
0, 0, syscall.EILSEQ, syscall.EILSEQ, | |||
}, | |||
iconvTest{ | |||
"incomplete input sequence causes EINVAL", | |||
"\xC2", "utf-8", | |||
"", "latin1", | |||
0, 0, syscall.EINVAL, syscall.EINVAL, | |||
}, | |||
iconvTest{ | |||
"invalid input causes partial output and EILSEQ", | |||
"Hello\xFF", "utf-8", | |||
"Hello", "latin1", | |||
5, 5, syscall.EILSEQ, syscall.EILSEQ, | |||
}, | |||
iconvTest{ | |||
"incomplete input causes partial output and EILSEQ", | |||
"Hello\xC2", "utf-8", | |||
"Hello", "latin1", | |||
5, 5, syscall.EINVAL, syscall.EINVAL, | |||
}, | |||
/* this is only true for glibc / iconv | |||
iconvTest{ | |||
"valid input but no conversion causes EILSEQ", | |||
"你好世界 Hello World", "utf-8", | |||
"", "latin1", | |||
0, 0, syscall.EILSEQ, syscall.EILSEQ, | |||
},*/ | |||
iconvTest{ | |||
"invalid input with ignore", | |||
"Hello\xFF World!", "utf-8", | |||
"Hello World!", "latin1//IGNORE", | |||
13, 12, syscall.EILSEQ, nil, | |||
}, | |||
iconvTest{ | |||
"valid input but no conversion with IGNORE", | |||
"你好世界 Hello World", "utf-8", | |||
" Hello World", "latin1//IGNORE", | |||
24, 12, syscall.EILSEQ, nil, | |||
}, | |||
iconvTest{ | |||
"valid input but no conversion with TRANSLIT", | |||
"你好世界 Hello World", "utf-8", | |||
"???? Hello World", "latin1//TRANSLIT", | |||
24, 16, nil, nil, | |||
}, | |||
} | |||
ignoreDetected, translitDetected bool | |||
) | |||
func init() { | |||
// detect if IGNORE / TRANSLIT is supported (glic / libiconv) | |||
conv, err := NewConverter("utf-8", "ascii//IGNORE") | |||
if err == nil { | |||
ignoreDetected = true | |||
conv.Close() | |||
} | |||
conv, err = NewConverter("utf-8", "ascii//TRANSLIT") | |||
if err == nil { | |||
translitDetected = true | |||
conv.Close() | |||
} | |||
} | |||
func TestConvertString(t *testing.T) { | |||
func runTests(t *testing.T, f func(iconvTest, *testing.T) (int, int, string, error)) { | |||
for _, test := range iconvTests { | |||
// perform the conversion | |||
output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding) | |||
t.Run(test.description, func(t *testing.T) { | |||
if !ignoreDetected && strings.HasSuffix(test.outputEncoding, "//IGNORE") { | |||
t.Skip("//IGNORE not supported") | |||
} | |||
// check that output and err match | |||
if output != test.output { | |||
t.Errorf("test \"%s\" failed, output did not match expected", test.description) | |||
} | |||
if !translitDetected && strings.HasSuffix(test.outputEncoding, "//TRANSLIT") { | |||
t.Skip("//TRANSLIT not supported") | |||
} | |||
// check that err is same as expected | |||
if err != test.err { | |||
if test.err != nil { | |||
if err != nil { | |||
t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err) | |||
bytesRead, bytesWritten, output, err := f(test, t) | |||
// check that bytesRead is same as expected | |||
if bytesRead != test.bytesRead { | |||
t.Errorf("bytesRead: %d expected: %d", bytesRead, test.bytesRead) | |||
} | |||
// check that bytesWritten is same as expected | |||
if bytesWritten != test.bytesWritten { | |||
t.Errorf("bytesWritten: %d expected: %d", bytesWritten, test.bytesWritten) | |||
} | |||
// check output bytes against expected | |||
if output != test.output { | |||
t.Errorf("output: %x expected: %x", output, test.output) | |||
} | |||
// check that err is same as expected | |||
if err != test.err { | |||
if test.err != nil { | |||
if err != nil { | |||
t.Errorf("err: %q expected: %q", err, test.err) | |||
} else { | |||
t.Errorf("err: nil expected %q", test.err) | |||
} | |||
} else { | |||
t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err) | |||
t.Errorf("unexpected error: %q", err) | |||
} | |||
} else { | |||
t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err) | |||
} | |||
} | |||
}) | |||
} | |||
} | |||
func TestConvert(t *testing.T) { | |||
for _, test := range iconvTests { | |||
// setup input buffer | |||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||
input := []byte(test.input) | |||
// setup a buffer as large as the expected bytesWritten | |||
output := make([]byte, 50) | |||
// peform the conversion | |||
bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding) | |||
// check that bytesRead is same as expected | |||
if bytesRead != test.bytesRead { | |||
t.Errorf("test \"%s\" failed, bytesRead did not match expected", test.description) | |||
// HACK Convert has different erorrs, so check ourselves, and then fake out later check | |||
if err != test.convertErr { | |||
if test.convertErr != nil { | |||
if err != nil { | |||
t.Errorf("err: %q expected: %q", err, test.convertErr) | |||
} else { | |||
t.Errorf("err: nil expected %q", test.convertErr) | |||
} | |||
} else { | |||
t.Errorf("unexpected error: %q", err) | |||
} | |||
} | |||
err = test.err | |||
// check that bytesWritten is same as expected | |||
if bytesWritten != test.bytesWritten { | |||
t.Errorf("test \"%s\" failed, bytesWritten did not match expected", test.description) | |||
} | |||
return bytesRead, bytesWritten, string(output[:bytesWritten]), err | |||
}) | |||
} | |||
// check output bytes against expected - simplest to convert output to | |||
// string and then do an equality check which is actually a byte wise operation | |||
if string(output[:bytesWritten]) != test.output { | |||
t.Errorf("test \"%s\" failed, output did not match expected", test.description) | |||
} | |||
func TestConvertString(t *testing.T) { | |||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||
// perform the conversion | |||
output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding) | |||
// check that err is same as expected | |||
if err != test.err { | |||
if test.err != nil { | |||
if err != nil { | |||
t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err) | |||
} else { | |||
t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err) | |||
// bytesRead and bytesWritten are spoofed a little | |||
return test.bytesRead, len(output), output, err | |||
}) | |||
} | |||
func TestReader(t *testing.T) { | |||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||
var bytesRead, bytesWritten, finalBytesWritten int | |||
var err error | |||
input := bytes.NewBufferString(test.input) | |||
output := make([]byte, 50) | |||
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding) | |||
if err == nil { | |||
bytesWritten, err = reader.Read(output) | |||
// we can compute how many bytes iconv read by inspecting the reader state | |||
bytesRead = len([]byte(test.input)) - input.Len() - (reader.writePos - reader.readPos) | |||
// with current tests and buffer sizes, we'd expect all input to be buffered if we called read | |||
if input.Len() != 0 { | |||
t.Error("not all bytes from input were buffered") | |||
} | |||
// do final read test if we can - either get EOF or same test error | |||
if err == nil { | |||
finalBytesWritten, err = reader.Read(output[bytesWritten:]) | |||
if finalBytesWritten != 0 { | |||
t.Errorf("finalBytesWritten: %d expected: 0", finalBytesWritten) | |||
} | |||
if err == io.EOF { | |||
err = nil | |||
} | |||
} else { | |||
t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err) | |||
} | |||
} | |||
return bytesRead, bytesWritten, string(output[:bytesWritten]), err | |||
}) | |||
} | |||
func TestWriter(t *testing.T) { | |||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||
var bytesRead, bytesWritten int | |||
var err error | |||
input := []byte(test.input) | |||
output := new(bytes.Buffer) | |||
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding) | |||
if err == nil { | |||
bytesRead, err = writer.Write(input) | |||
bytesRead -= writer.readPos | |||
writer.Close() | |||
bytesWritten = output.Len() | |||
} | |||
return bytesRead, bytesWritten, output.String(), err | |||
}) | |||
} | |||
func TestReaderWithCopy(t *testing.T) { | |||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||
input := bytes.NewBufferString(test.input) | |||
output := new(bytes.Buffer) | |||
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding) | |||
if err == nil { | |||
_, err := io.Copy(output, reader) | |||
bytesRead := len(test.input) - input.Len() - reader.writePos | |||
bytesWritten := output.Len() | |||
return bytesRead, bytesWritten, output.String(), err | |||
} | |||
return 0, 0, output.String(), err | |||
}) | |||
} | |||
func TestWriterWithCopy(t *testing.T) { | |||
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { | |||
input := bytes.NewBufferString(test.input) | |||
output := new(bytes.Buffer) | |||
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding) | |||
if err == nil { | |||
bytesCopied, err := io.Copy(writer, input) | |||
bytesRead := int(bytesCopied) - writer.readPos | |||
writer.Close() | |||
bytesWritten := output.Len() | |||
return bytesRead, bytesWritten, output.String(), err | |||
} | |||
return 0, 0, output.String(), err | |||
}) | |||
} | |||
func TestReaderMultipleReads(t *testing.T) { | |||
// setup a source reader and our expected output string | |||
source := bytes.NewBufferString("\x80\x8A\x99\x95\x8B\x86\x87") | |||
expected := "€Š™•‹†‡" | |||
// setup reader - use our minimum buffer size so we can force it to shuffle the buffer around | |||
reader, err := NewReaderSized(source, "cp1252", "utf-8", minReadBufferSize) | |||
if err != nil { | |||
if err == syscall.EINVAL { | |||
t.Skip("Either cp1252 or utf-8 isn't supported by iconv on your system") | |||
} else { | |||
t.Fatalf("Unexpected error when creating reader: %s", err) | |||
} | |||
} | |||
// setup a read buffer - we'll slice it to different sizes in our tests | |||
buffer := make([]byte, 64) | |||
// first read should fill internal buffer, but we'll only read part of it | |||
bytesRead, err := reader.Read(buffer[:5]) | |||
if bytesRead != 5 || err != nil { | |||
t.Fatalf("first read did not give expected 5, nil: %d, %s", bytesRead, err) | |||
} | |||
// because of how small teh source is and our minimum buffer size, source shoudl be fully read | |||
if source.Len() != 0 { | |||
t.Fatalf("first read did not buffer all of source like expected: %d bytes remain", source.Len()) | |||
} | |||
// Buffer doesn't return EOF with last bytes, reader shouldn't know its EOF yet | |||
if reader.eof { | |||
t.Fatalf("first read was not expected to receive EOF") | |||
} | |||
// second read should shift internal buffer, and fill again - make buffer too small for last utf-8 character | |||
// E2BIG from iconv should be ignored because we wrote at least 1 byte | |||
bytesRead, err = reader.Read(buffer[5:18]) | |||
if bytesRead != 12 || err != nil { | |||
t.Fatalf("second read did not give expected 15, nil: %d, %s", bytesRead, err) | |||
} | |||
if !reader.eof { | |||
t.Fatalf("second read did not put reader into eof state") | |||
} | |||
// try to read the last 3 byte character with only a buffer of 2 bytes - this time we should see the E2BIG | |||
bytesRead, err = reader.Read(buffer[17:19]) | |||
if bytesRead != 0 || err != syscall.E2BIG { | |||
t.Fatalf("third read did not give expected 0, E2BIG: %d, %s", bytesRead, err) | |||
} | |||
// fourth read should finish last character | |||
bytesRead, err = reader.Read(buffer[17:]) | |||
if bytesRead != 3 || err != nil { | |||
t.Fatalf("fourth read did not give expected 3, nil: %d, %s", bytesRead, err) | |||
} | |||
// last read should be EOF | |||
bytesRead, err = reader.Read(buffer[20:]) | |||
if bytesRead != 0 || err != io.EOF { | |||
t.Fatalf("final read did not give expected 0, EOF: %d, %s", bytesRead, err) | |||
} | |||
// check full utf-8 output | |||
if string(buffer[:20]) != expected { | |||
t.Fatalf("output did not match expected %q: %q", expected, string(buffer[:20])) | |||
} | |||
} | |||
func TestWriteWithIncompleteSequence(t *testing.T) { | |||
expected := "\x80\x8A\x99\x95\x8B\x86\x87" | |||
input := []byte("€Š™•‹†‡") | |||
output := new(bytes.Buffer) | |||
writer, err := NewWriter(output, "utf-8", "cp1252") | |||
if err != nil { | |||
t.Fatalf("unexpected error while creating writer %q", err) | |||
} | |||
// the input string is made of 3 byte characters, for the test we want to only write part of the last character | |||
bytesFromBuffer := len(input) - 2 | |||
bytesRead, err := writer.Write(input[:bytesFromBuffer]) | |||
if bytesRead != bytesFromBuffer { | |||
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) | |||
} | |||
// finish the rest | |||
bytesRead, err = writer.Write(input[bytesFromBuffer:]) | |||
if bytesRead != 2 { | |||
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err) | |||
} | |||
err = writer.Close() | |||
actual := output.String() | |||
if err != nil { | |||
t.Errorf("got an error on close: %s", err) | |||
} | |||
if actual != expected { | |||
t.Errorf("output %x did not match expected %x", actual, expected) | |||
} | |||
} | |||
func TestWriteWithIncompleteSequenceAndIgnore(t *testing.T) { | |||
if !ignoreDetected { | |||
t.Skip("//IGNORE not supported") | |||
} | |||
expected := "\x80\x8A\x99\x95\x8B\x86\x87" | |||
input := []byte("€Š™•‹†‡") | |||
output := new(bytes.Buffer) | |||
writer, err := NewWriter(output, "utf-8", "cp1252//IGNORE") | |||
if err != nil { | |||
t.Fatalf("unexpected error while creating writer %q", err) | |||
} | |||
// the input string is made of 3 byte characters, for the test we want to only write part of the last character | |||
bytesFromBuffer := len(input) - 2 | |||
bytesRead, err := writer.Write(input[:bytesFromBuffer]) | |||
if bytesRead != bytesFromBuffer { | |||
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) | |||
} | |||
// finish the rest | |||
bytesRead, err = writer.Write(input[bytesFromBuffer:]) | |||
if bytesRead != 2 { | |||
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err) | |||
} | |||
err = writer.Close() | |||
actual := output.String() | |||
if err != nil { | |||
t.Errorf("got an error on close: %s", err) | |||
} | |||
if actual != expected { | |||
t.Errorf("output %x did not match expected %x", actual, expected) | |||
} | |||
} | |||
func TestWriteWithIncompleteSequenceAtEOF(t *testing.T) { | |||
expected := "\x80\x8A\x99\x95\x8B\x86" | |||
input := []byte("€Š™•‹†‡") | |||
output := new(bytes.Buffer) | |||
writer, err := NewWriter(output, "utf-8", "cp1252") | |||
if err != nil { | |||
t.Fatalf("unexpected error while creating writer %q", err) | |||
} | |||
// the input string is made of 3 byte characters, for the test we want to only write part of the last character | |||
bytesFromBuffer := len(input) - 2 | |||
bytesRead, err := writer.Write(input[:bytesFromBuffer]) | |||
if bytesRead != bytesFromBuffer { | |||
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) | |||
} | |||
err = writer.Close() | |||
actual := output.String() | |||
if err != nil { | |||
t.Errorf("got an error on close: %s", err) | |||
} | |||
if actual != expected { | |||
t.Errorf("output %x did not match expected %x", actual, expected) | |||
} | |||
} |
@@ -2,7 +2,12 @@ package iconv | |||
import ( | |||
"io" | |||
"syscall" | |||
"runtime" | |||
) | |||
const ( | |||
defaultReadBufferSize = 8 * 1024 | |||
minReadBufferSize = 16 | |||
) | |||
type Reader struct { | |||
@@ -10,91 +15,100 @@ type Reader struct { | |||
converter *Converter | |||
buffer []byte | |||
readPos, writePos int | |||
err error | |||
eof bool | |||
} | |||
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, error) { | |||
// create a converter | |||
converter, err := NewConverter(fromEncoding, toEncoding) | |||
if err == nil { | |||
return NewReaderFromConverter(source, converter), err | |||
} | |||
func NewReader(source io.Reader, fromEncoding, toEncoding string) (*Reader, error) { | |||
return NewReaderSized(source, fromEncoding, toEncoding, defaultReadBufferSize) | |||
} | |||
// return the error | |||
return nil, err | |||
func NewReaderFromConverter(source io.Reader, converter *Converter) *Reader { | |||
return NewReaderFromConverterSized(source, converter, defaultReadBufferSize) | |||
} | |||
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) { | |||
reader = new(Reader) | |||
func NewReaderSized(source io.Reader, fromEncoding, toEncoding string, size int) (*Reader, error) { | |||
converter, err := NewConverter(fromEncoding, toEncoding) | |||
// copy elements | |||
reader.source = source | |||
reader.converter = converter | |||
if err != nil { | |||
return nil, err | |||
} | |||
// create 8K buffers | |||
reader.buffer = make([]byte, 8*1024) | |||
// add a finalizer for the converter we created | |||
runtime.SetFinalizer(converter, finalizeConverter) | |||
return reader | |||
return NewReaderFromConverterSized(source, converter, size), nil | |||
} | |||
func (this *Reader) fillBuffer() { | |||
// slide existing data to beginning | |||
if this.readPos > 0 { | |||
// copy current bytes - is this guaranteed safe? | |||
copy(this.buffer, this.buffer[this.readPos:this.writePos]) | |||
// adjust positions | |||
this.writePos -= this.readPos | |||
this.readPos = 0 | |||
func NewReaderFromConverterSized(source io.Reader, converter *Converter, size int) *Reader { | |||
if size < minReadBufferSize { | |||
size = minReadBufferSize | |||
} | |||
// read new data into buffer at write position | |||
bytesRead, err := this.source.Read(this.buffer[this.writePos:]) | |||
// adjust write position | |||
this.writePos += bytesRead | |||
// track any reader error / EOF | |||
if err != nil { | |||
this.err = err | |||
return &Reader{ | |||
source: source, | |||
converter: converter, | |||
buffer: make([]byte, size), | |||
} | |||
} | |||
// implement the io.Reader interface | |||
func (this *Reader) Read(p []byte) (n int, err error) { | |||
// checks for when we have no data | |||
for this.writePos == 0 || this.readPos == this.writePos { | |||
// if we have an error / EOF, just return it | |||
if this.err != nil { | |||
return n, this.err | |||
func (r *Reader) Read(p []byte) (int, error) { | |||
if len(p) == 0 { | |||
return 0, nil | |||
} | |||
var bytesRead, bytesWritten int | |||
var err error | |||
// setup for a single read into buffer if possible | |||
if !r.eof { | |||
if r.readPos > 0 { | |||
// slide data to front of buffer | |||
r.readPos, r.writePos = 0, copy(r.buffer, r.buffer[r.readPos:r.writePos]) | |||
} | |||
// else, fill our buffer | |||
this.fillBuffer() | |||
} | |||
if r.writePos < len(r.buffer) { | |||
// do the single read | |||
bytesRead, err = r.source.Read(r.buffer[r.writePos:]) | |||
// TODO: checks for when we have less data than len(p) | |||
if bytesRead < 0 { | |||
panic("iconv: source reader returned negative count from Read") | |||
} | |||
// we should have an appropriate amount of data, convert it into the given buffer | |||
bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p) | |||
r.writePos += bytesRead | |||
r.eof = err == io.EOF | |||
} | |||
} | |||
// adjust byte counters | |||
this.readPos += bytesRead | |||
n += bytesWritten | |||
if r.readPos < r.writePos || r.eof { | |||
// convert any buffered data we have, or do a final reset (for shift based conversions) | |||
bytesRead, bytesWritten, err = r.converter.Convert(r.buffer[r.readPos:r.writePos], p) | |||
r.readPos += bytesRead | |||
// if we experienced an iconv error, check it | |||
if err != nil { | |||
// E2BIG errors can be ignored (we'll get them often) as long | |||
// as at least 1 byte was written. If we experienced an E2BIG | |||
// and no bytes were written then the buffer is too small for | |||
// even the next character | |||
if err != syscall.E2BIG || bytesWritten == 0 { | |||
// track anything else | |||
this.err = err | |||
// if we experienced an iconv error and didn't make progress, report it. | |||
// if we did make progress, it may be informational only (i.e. reporting | |||
// an EILSEQ even when using //ignore to skip them) | |||
if err != nil && bytesWritten == 0 { | |||
return bytesWritten, err | |||
} | |||
// signal an EOF only if we didn't write anything - accomodates premature | |||
// errror checking in user code | |||
if bytesWritten == 0 && r.eof { | |||
return 0, io.EOF | |||
} | |||
return bytesWritten, nil | |||
} | |||
// return our results | |||
return n, this.err | |||
return 0, err | |||
} | |||
func (r *Reader) Reset(source io.Reader) { | |||
r.converter.Reset() | |||
*r = Reader{ | |||
source: source, | |||
converter: r.converter, | |||
buffer: r.buffer, | |||
} | |||
} |
@@ -1,82 +1,181 @@ | |||
package iconv | |||
import "io" | |||
import ( | |||
"io" | |||
"runtime" | |||
"syscall" | |||
) | |||
const ( | |||
defaultWriteBufferSize = 8 * 1024 | |||
minWriteBufferSize = 16 | |||
) | |||
type Writer struct { | |||
destination io.Writer | |||
converter *Converter | |||
buffer []byte | |||
readPos, writePos int | |||
err error | |||
destination io.Writer | |||
converter *Converter | |||
readBuffer, writeBuffer []byte | |||
readPos, writePos int | |||
} | |||
func NewWriter(destination io.Writer, fromEncoding string, toEncoding string) (*Writer, error) { | |||
// create a converter | |||
return NewWriterSized(destination, fromEncoding, toEncoding, defaultWriteBufferSize) | |||
} | |||
func NewWriterFromConverter(destination io.Writer, converter *Converter) (writer *Writer) { | |||
return NewWriterFromConverterSized(destination, converter, defaultWriteBufferSize) | |||
} | |||
func NewWriterSized(destination io.Writer, fromEncoding, toEncoding string, size int) (*Writer, error) { | |||
converter, err := NewConverter(fromEncoding, toEncoding) | |||
if err == nil { | |||
return NewWriterFromConverter(destination, converter), err | |||
if err != nil { | |||
return nil, err | |||
} | |||
// return the error | |||
return nil, err | |||
// add a finalizer for the converter we created | |||
runtime.SetFinalizer(converter, finalizeConverter) | |||
return NewWriterFromConverterSized(destination, converter, size), nil | |||
} | |||
func NewWriterFromConverter(destination io.Writer, converter *Converter) (writer *Writer) { | |||
writer = new(Writer) | |||
func NewWriterFromConverterSized(destination io.Writer, converter *Converter, size int) *Writer { | |||
if size < minWriteBufferSize { | |||
size = minWriteBufferSize | |||
} | |||
// copy elements | |||
writer.destination = destination | |||
writer.converter = converter | |||
return &Writer{ | |||
destination: destination, | |||
converter: converter, | |||
readBuffer: make([]byte, size), | |||
writeBuffer: make([]byte, size), | |||
} | |||
} | |||
// create 8K buffers | |||
writer.buffer = make([]byte, 8*1024) | |||
// Implements io.Writer | |||
// | |||
// Will attempt to convert all of p into buffer. If there's not enough room in | |||
// the buffer to hold all converted bytes, the buffer will be flushed and p will | |||
// continue to be processed. Close should be called on a writer when finished | |||
// with all writes, to ensure final shift sequences are written and buffer is | |||
// flushed to underlying io.Writer. | |||
// | |||
// Can return all errors that Convert can, as well as any errors from Flush. Note | |||
// that some errors from Convert are suppressed if we continue making progress | |||
// on p. | |||
func (w *Writer) Write(p []byte) (int, error) { | |||
var totalBytesRead, bytesRead, bytesWritten int | |||
var err error | |||
if w.readPos == 0 || len(p) == 0 { | |||
bytesRead, bytesWritten, err = w.converter.Convert(p, w.writeBuffer[w.writePos:]) | |||
totalBytesRead += bytesRead | |||
w.writePos += bytesWritten | |||
w.readPos = 0 | |||
} else { | |||
// we have left over bytes from previous write that weren't complete and there's at least | |||
// one byte being written, fill read buffer with p and try to convert, if we make progress | |||
// we can continue conversion from p itself | |||
bytesCopied := copy(w.readBuffer[w.readPos:], p) | |||
bytesRead, bytesWritten, err = w.converter.Convert(w.readBuffer[:w.readPos+bytesCopied], w.writeBuffer[w.writePos:]) | |||
// if we made no progress, give up | |||
if bytesRead <= w.readPos { | |||
return 0, err | |||
} | |||
return writer | |||
} | |||
bytesRead -= w.readPos | |||
totalBytesRead += bytesRead | |||
func (this *Writer) emptyBuffer() { | |||
// write new data out of buffer | |||
bytesWritten, err := this.destination.Write(this.buffer[this.readPos:this.writePos]) | |||
w.readPos = 0 | |||
w.writePos += bytesWritten | |||
} | |||
// update read position | |||
this.readPos += bytesWritten | |||
// try to process all of p - lots of io functions don't like short writes. | |||
// | |||
// There are a few error cases we need to treat specially, as long as we've | |||
// made progress on p, E2BIG and EILSEQ should not be fatal. EINVAL isn't | |||
// fatal as long as the rest of p fits in our buffers. | |||
for err != nil && bytesRead > 0 { | |||
switch err { | |||
case syscall.E2BIG: | |||
err = w.Flush() | |||
case syscall.EILSEQ: | |||
// IGNORE suffix still reports the error on convert | |||
err = nil | |||
// if no more bytes, don't do an empty convert (resets state) | |||
if totalBytesRead == len(p) { | |||
break | |||
} | |||
case syscall.EINVAL: | |||
// if the rest of p fits in read buffer copy it there | |||
if len(p[totalBytesRead:]) <= len(w.readBuffer) { | |||
w.readPos = copy(w.readBuffer, p[totalBytesRead:]) | |||
totalBytesRead += w.readPos | |||
break | |||
} | |||
} | |||
// slide existing data to beginning | |||
if this.readPos > 0 { | |||
// copy current bytes - is this guaranteed safe? | |||
copy(this.buffer, this.buffer[this.readPos:this.writePos]) | |||
// if not an ignoreable err or Flush err | |||
if err != nil { | |||
break | |||
} | |||
// adjust positions | |||
this.writePos -= this.readPos | |||
this.readPos = 0 | |||
bytesRead, bytesWritten, err = w.converter.Convert(p[totalBytesRead:], w.writeBuffer[w.writePos:]) | |||
totalBytesRead += bytesRead | |||
w.writePos += bytesWritten | |||
} | |||
// track any reader error / EOF | |||
if err != nil { | |||
this.err = err | |||
} | |||
return totalBytesRead, err | |||
} | |||
// implement the io.Writer interface | |||
func (this *Writer) Write(p []byte) (n int, err error) { | |||
// write data into our internal buffer | |||
bytesRead, bytesWritten, err := this.converter.Convert(p, this.buffer[this.writePos:]) | |||
// Attempt to write any buffered data to destination writer. Returns error from | |||
// Write call or io.ErrShortWrite if Write didn't report an error but also didn't | |||
// accept all bytes given. | |||
func (w *Writer) Flush() error { | |||
if w.readPos < w.writePos { | |||
bytesWritten, err := w.destination.Write(w.writeBuffer[:w.writePos]) | |||
// update bytes written for return | |||
n += bytesRead | |||
this.writePos += bytesWritten | |||
if bytesWritten < 0 { | |||
panic("iconv: writer returned negative count from Write") | |||
} | |||
if bytesWritten > 0 { | |||
w.writePos = copy(w.writeBuffer, w.writeBuffer[bytesWritten:w.writePos]) | |||
} | |||
// checks for when we have a full buffer | |||
for this.writePos > 0 { | |||
// if we have an error, just return it | |||
if this.err != nil { | |||
return | |||
if err == nil && w.writePos > 0 { | |||
err = io.ErrShortWrite | |||
} | |||
// else empty the buffer | |||
this.emptyBuffer() | |||
return err | |||
} | |||
return n, err | |||
return nil | |||
} | |||
// Perform a final write with empty buffer, which allows iconv to close any shift | |||
// sequences. A Flush is performed if needed. | |||
func (w *Writer) Close() error { | |||
_, err := w.Write(nil) | |||
if err != nil { | |||
return err | |||
} | |||
return w.Flush() | |||
} | |||
// Reset state and direct writes to a new destination writer | |||
func (w *Writer) Reset(destination io.Writer) { | |||
w.converter.Reset() | |||
*w = Writer{ | |||
destination: destination, | |||
converter: w.converter, | |||
readBuffer: w.readBuffer, | |||
writeBuffer: w.writeBuffer, | |||
} | |||
} |