Compare commits
1 Commits
master
...
reader-wri
Author | SHA1 | Date | |
---|---|---|---|
|
a84994e6e9 |
99
converter.go
99
converter.go
@ -6,6 +6,7 @@ package iconv
|
|||||||
#cgo windows LDFLAGS: -liconv
|
#cgo windows LDFLAGS: -liconv
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <iconv.h>
|
#include <iconv.h>
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic
|
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic
|
||||||
// Therofore we use this wrapper function, to avoid passing **char directly from go
|
// Therofore we use this wrapper function, to avoid passing **char directly from go
|
||||||
@ -20,47 +21,40 @@ import "unsafe"
|
|||||||
|
|
||||||
type Converter struct {
|
type Converter struct {
|
||||||
context C.iconv_t
|
context C.iconv_t
|
||||||
open bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
|
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
|
||||||
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
|
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
|
||||||
// there is not enough memory to initialize an iconv descriptor
|
// there is not enough memory to initialize an iconv descriptor
|
||||||
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
|
func NewConverter(fromEncoding string, toEncoding string) (*Converter, error) {
|
||||||
converter = new(Converter)
|
|
||||||
|
|
||||||
// convert to C strings
|
// convert to C strings
|
||||||
toEncodingC := C.CString(toEncoding)
|
toEncodingC := C.CString(toEncoding)
|
||||||
fromEncodingC := C.CString(fromEncoding)
|
fromEncodingC := C.CString(fromEncoding)
|
||||||
|
|
||||||
// open an iconv descriptor
|
// open an iconv descriptor
|
||||||
converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)
|
context, err := C.iconv_open(toEncodingC, fromEncodingC)
|
||||||
|
|
||||||
// free the C Strings
|
// free the C Strings
|
||||||
C.free(unsafe.Pointer(toEncodingC))
|
C.free(unsafe.Pointer(toEncodingC))
|
||||||
C.free(unsafe.Pointer(fromEncodingC))
|
C.free(unsafe.Pointer(fromEncodingC))
|
||||||
|
|
||||||
// check err
|
if err != nil {
|
||||||
if err == nil {
|
return nil, err
|
||||||
// no error, mark the context as open
|
|
||||||
converter.open = true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return &Converter{context}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// destroy is called during garbage collection
|
// Close a Converter's iconv descriptor explicitly
|
||||||
func (this *Converter) destroy() {
|
func (converter *Converter) Close() error {
|
||||||
this.Close()
|
_, err := C.iconv_close(converter.context)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close a Converter's iconv description explicitly
|
// Reset state of iconv context
|
||||||
func (this *Converter) Close() (err error) {
|
func (converter *Converter) Reset() error {
|
||||||
if this.open {
|
_, _, err := converter.Convert(nil, nil)
|
||||||
_, err = C.iconv_close(this.context)
|
return err
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert bytes from an input byte slice into a give output byte slice
|
// Convert bytes from an input byte slice into a give output byte slice
|
||||||
@ -74,50 +68,32 @@ func (this *Converter) Close() (err error) {
|
|||||||
// For shift based output encodings, any end shift byte sequences can be generated by
|
// For shift based output encodings, any end shift byte sequences can be generated by
|
||||||
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
|
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
|
||||||
// will simply reset the iconv descriptor shift state without writing any bytes.
|
// will simply reset the iconv descriptor shift state without writing any bytes.
|
||||||
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
|
func (converter *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
|
||||||
// make sure we are still open
|
|
||||||
if this.open {
|
|
||||||
inputLeft := C.size_t(len(input))
|
inputLeft := C.size_t(len(input))
|
||||||
outputLeft := C.size_t(len(output))
|
outputLeft := C.size_t(len(output))
|
||||||
|
|
||||||
if inputLeft > 0 && outputLeft > 0 {
|
var inputPointer, outputPointer *C.char
|
||||||
// we have to give iconv a pointer to a pointer of the underlying
|
|
||||||
// storage of each byte slice - so far this is the simplest
|
|
||||||
// way i've found to do that in Go, but it seems ugly
|
|
||||||
inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
|
|
||||||
outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
|
|
||||||
|
|
||||||
_, err = C.call_iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
|
if inputLeft > 0 {
|
||||||
|
inputPointer = (*C.char)(unsafe.Pointer(&input[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
if outputLeft > 0 {
|
||||||
|
outputPointer = (*C.char)(unsafe.Pointer(&output[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = C.call_iconv(converter.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
|
||||||
|
|
||||||
// update byte counters
|
|
||||||
bytesRead = len(input) - int(inputLeft)
|
bytesRead = len(input) - int(inputLeft)
|
||||||
bytesWritten = len(output) - int(outputLeft)
|
bytesWritten = len(output) - int(outputLeft)
|
||||||
} else if inputLeft == 0 && outputLeft > 0 {
|
|
||||||
// inputPointer will be nil, outputPointer is generated as above
|
|
||||||
outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
|
|
||||||
|
|
||||||
_, err = C.call_iconv(this.context, nil, &inputLeft, outputPointer, &outputLeft)
|
|
||||||
|
|
||||||
// update write byte counter
|
|
||||||
bytesWritten = len(output) - int(outputLeft)
|
|
||||||
} else {
|
|
||||||
// both input and output are zero length, do a shift state reset
|
|
||||||
_, err = C.call_iconv(this.context, nil, &inputLeft, nil, &outputLeft)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
err = syscall.EBADF
|
|
||||||
}
|
|
||||||
|
|
||||||
return bytesRead, bytesWritten, err
|
return bytesRead, bytesWritten, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert an input string
|
// Convert an input string
|
||||||
//
|
//
|
||||||
// EILSEQ error may be returned if input contains invalid bytes for the
|
// EILSEQ error may be returned if input contains invalid bytes for the Converter's fromEncoding
|
||||||
// Converter's fromEncoding.
|
func (converter *Converter) ConvertString(input string) (output string, err error) {
|
||||||
func (this *Converter) ConvertString(input string) (output string, err error) {
|
|
||||||
// make sure we are still open
|
|
||||||
if this.open {
|
|
||||||
// construct the buffers
|
// construct the buffers
|
||||||
inputBuffer := []byte(input)
|
inputBuffer := []byte(input)
|
||||||
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
|
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
|
||||||
@ -127,14 +103,13 @@ func (this *Converter) ConvertString(input string) (output string, err error) {
|
|||||||
|
|
||||||
for totalBytesRead < len(inputBuffer) && err == nil {
|
for totalBytesRead < len(inputBuffer) && err == nil {
|
||||||
// use the totals to create buffer slices
|
// use the totals to create buffer slices
|
||||||
bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
|
bytesRead, bytesWritten, err = converter.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
|
||||||
|
|
||||||
totalBytesRead += bytesRead
|
totalBytesRead += bytesRead
|
||||||
totalBytesWritten += bytesWritten
|
totalBytesWritten += bytesWritten
|
||||||
|
|
||||||
// check for the E2BIG error specifically, we can add to the output
|
switch err {
|
||||||
// buffer to correct for it and then continue
|
case syscall.E2BIG:
|
||||||
if err == syscall.E2BIG {
|
|
||||||
// increase the size of the output buffer by another input length
|
// increase the size of the output buffer by another input length
|
||||||
// first, create a new buffer
|
// first, create a new buffer
|
||||||
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
|
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
|
||||||
@ -147,12 +122,17 @@ func (this *Converter) ConvertString(input string) (output string, err error) {
|
|||||||
|
|
||||||
// forget the error
|
// forget the error
|
||||||
err = nil
|
err = nil
|
||||||
|
case syscall.EILSEQ, syscall.EINVAL:
|
||||||
|
// iconv can still return these in cases where it still can proceed such as //IGNORE
|
||||||
|
if bytesRead > 0 || bytesWritten > 0 {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// perform a final shift state reset
|
// perform a final shift state reset
|
||||||
_, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
|
_, bytesWritten, err = converter.Convert(nil, outputBuffer[totalBytesWritten:])
|
||||||
|
|
||||||
// update total count
|
// update total count
|
||||||
totalBytesWritten += bytesWritten
|
totalBytesWritten += bytesWritten
|
||||||
@ -160,9 +140,10 @@ func (this *Converter) ConvertString(input string) (output string, err error) {
|
|||||||
|
|
||||||
// construct the final output string
|
// construct the final output string
|
||||||
output = string(outputBuffer[:totalBytesWritten])
|
output = string(outputBuffer[:totalBytesWritten])
|
||||||
} else {
|
|
||||||
err = syscall.EBADF
|
|
||||||
}
|
|
||||||
|
|
||||||
return output, err
|
return output, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func finalizeConverter(converter *Converter) {
|
||||||
|
converter.Close()
|
||||||
|
}
|
||||||
|
468
iconv_test.go
468
iconv_test.go
@ -1,6 +1,9 @@
|
|||||||
package iconv
|
package iconv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@ -13,105 +16,486 @@ type iconvTest struct {
|
|||||||
outputEncoding string
|
outputEncoding string
|
||||||
bytesRead int
|
bytesRead int
|
||||||
bytesWritten int
|
bytesWritten int
|
||||||
err error
|
convertErr error // err from Convert (raw iconv)
|
||||||
|
err error // err from CovertString, Reader, Writer
|
||||||
}
|
}
|
||||||
|
|
||||||
var iconvTests = []iconvTest{
|
var (
|
||||||
|
iconvTests = []iconvTest{
|
||||||
iconvTest{
|
iconvTest{
|
||||||
"simple utf-8 to latin1 conversion success",
|
"simple utf-8 to latin1 conversion success",
|
||||||
"Hello World!", "utf-8",
|
"Hello World!", "utf-8",
|
||||||
"Hello World!", "latin1",
|
"Hello World!", "latin1",
|
||||||
12, 12, nil,
|
12, 12, nil, nil,
|
||||||
},
|
},
|
||||||
iconvTest{
|
iconvTest{
|
||||||
"invalid source encoding causes EINVAL",
|
"invalid source encoding causes EINVAL",
|
||||||
"", "doesnotexist",
|
"", "doesnotexist",
|
||||||
"", "utf-8",
|
"", "utf-8",
|
||||||
0, 0, syscall.EINVAL,
|
0, 0, syscall.EINVAL, syscall.EINVAL,
|
||||||
},
|
},
|
||||||
iconvTest{
|
iconvTest{
|
||||||
"invalid destination encoding causes EINVAL",
|
"invalid destination encoding causes EINVAL",
|
||||||
"", "utf-8",
|
"", "utf-8",
|
||||||
"", "doesnotexist",
|
"", "doesnotexist",
|
||||||
0, 0, syscall.EINVAL,
|
0, 0, syscall.EINVAL, syscall.EINVAL,
|
||||||
|
},
|
||||||
|
iconvTest{
|
||||||
|
"utf-8 to utf-8 passthrough",
|
||||||
|
"Hello world!", "utf-8",
|
||||||
|
"Hello world!", "utf-8",
|
||||||
|
12, 12, nil, nil,
|
||||||
|
},
|
||||||
|
iconvTest{
|
||||||
|
"utf-8 to utf-8 partial",
|
||||||
|
"Hello\xFFWorld!", "utf-8",
|
||||||
|
"Hello", "utf-8",
|
||||||
|
5, 5, syscall.EILSEQ, syscall.EILSEQ,
|
||||||
|
},
|
||||||
|
iconvTest{
|
||||||
|
"utf-8 to utf-8 ignored",
|
||||||
|
"Hello \xFFWorld!", "utf-8",
|
||||||
|
"Hello World!", "utf-8//IGNORE",
|
||||||
|
13, 12, syscall.EILSEQ, nil,
|
||||||
},
|
},
|
||||||
iconvTest{
|
iconvTest{
|
||||||
"invalid input sequence causes EILSEQ",
|
"invalid input sequence causes EILSEQ",
|
||||||
"\xFF", "utf-8",
|
"\xFF", "utf-8",
|
||||||
"", "latin1",
|
"", "latin1",
|
||||||
0, 0, syscall.EILSEQ,
|
0, 0, syscall.EILSEQ, syscall.EILSEQ,
|
||||||
|
},
|
||||||
|
iconvTest{
|
||||||
|
"incomplete input sequence causes EINVAL",
|
||||||
|
"\xC2", "utf-8",
|
||||||
|
"", "latin1",
|
||||||
|
0, 0, syscall.EINVAL, syscall.EINVAL,
|
||||||
},
|
},
|
||||||
iconvTest{
|
iconvTest{
|
||||||
"invalid input causes partial output and EILSEQ",
|
"invalid input causes partial output and EILSEQ",
|
||||||
"Hello\xFF", "utf-8",
|
"Hello\xFF", "utf-8",
|
||||||
"Hello", "latin1",
|
"Hello", "latin1",
|
||||||
5, 5, syscall.EILSEQ,
|
5, 5, syscall.EILSEQ, syscall.EILSEQ,
|
||||||
|
},
|
||||||
|
iconvTest{
|
||||||
|
"incomplete input causes partial output and EILSEQ",
|
||||||
|
"Hello\xC2", "utf-8",
|
||||||
|
"Hello", "latin1",
|
||||||
|
5, 5, syscall.EINVAL, syscall.EINVAL,
|
||||||
|
},
|
||||||
|
/* this is only true for glibc / iconv
|
||||||
|
iconvTest{
|
||||||
|
"valid input but no conversion causes EILSEQ",
|
||||||
|
"你好世界 Hello World", "utf-8",
|
||||||
|
"", "latin1",
|
||||||
|
0, 0, syscall.EILSEQ, syscall.EILSEQ,
|
||||||
|
},*/
|
||||||
|
iconvTest{
|
||||||
|
"invalid input with ignore",
|
||||||
|
"Hello\xFF World!", "utf-8",
|
||||||
|
"Hello World!", "latin1//IGNORE",
|
||||||
|
13, 12, syscall.EILSEQ, nil,
|
||||||
|
},
|
||||||
|
iconvTest{
|
||||||
|
"valid input but no conversion with IGNORE",
|
||||||
|
"你好世界 Hello World", "utf-8",
|
||||||
|
" Hello World", "latin1//IGNORE",
|
||||||
|
24, 12, syscall.EILSEQ, nil,
|
||||||
|
},
|
||||||
|
iconvTest{
|
||||||
|
"valid input but no conversion with TRANSLIT",
|
||||||
|
"你好世界 Hello World", "utf-8",
|
||||||
|
"???? Hello World", "latin1//TRANSLIT",
|
||||||
|
24, 16, nil, nil,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConvertString(t *testing.T) {
|
ignoreDetected, translitDetected bool
|
||||||
for _, test := range iconvTests {
|
)
|
||||||
// perform the conversion
|
|
||||||
output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding)
|
|
||||||
|
|
||||||
// check that output and err match
|
func init() {
|
||||||
|
// detect if IGNORE / TRANSLIT is supported (glic / libiconv)
|
||||||
|
conv, err := NewConverter("utf-8", "ascii//IGNORE")
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
ignoreDetected = true
|
||||||
|
conv.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
conv, err = NewConverter("utf-8", "ascii//TRANSLIT")
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
translitDetected = true
|
||||||
|
conv.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runTests(t *testing.T, f func(iconvTest, *testing.T) (int, int, string, error)) {
|
||||||
|
for _, test := range iconvTests {
|
||||||
|
t.Run(test.description, func(t *testing.T) {
|
||||||
|
if !ignoreDetected && strings.HasSuffix(test.outputEncoding, "//IGNORE") {
|
||||||
|
t.Skip("//IGNORE not supported")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !translitDetected && strings.HasSuffix(test.outputEncoding, "//TRANSLIT") {
|
||||||
|
t.Skip("//TRANSLIT not supported")
|
||||||
|
}
|
||||||
|
|
||||||
|
bytesRead, bytesWritten, output, err := f(test, t)
|
||||||
|
|
||||||
|
// check that bytesRead is same as expected
|
||||||
|
if bytesRead != test.bytesRead {
|
||||||
|
t.Errorf("bytesRead: %d expected: %d", bytesRead, test.bytesRead)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that bytesWritten is same as expected
|
||||||
|
if bytesWritten != test.bytesWritten {
|
||||||
|
t.Errorf("bytesWritten: %d expected: %d", bytesWritten, test.bytesWritten)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check output bytes against expected
|
||||||
if output != test.output {
|
if output != test.output {
|
||||||
t.Errorf("test \"%s\" failed, output did not match expected", test.description)
|
t.Errorf("output: %x expected: %x", output, test.output)
|
||||||
}
|
}
|
||||||
|
|
||||||
// check that err is same as expected
|
// check that err is same as expected
|
||||||
if err != test.err {
|
if err != test.err {
|
||||||
if test.err != nil {
|
if test.err != nil {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err)
|
t.Errorf("err: %q expected: %q", err, test.err)
|
||||||
} else {
|
} else {
|
||||||
t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err)
|
t.Errorf("err: nil expected %q", test.err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err)
|
t.Errorf("unexpected error: %q", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConvert(t *testing.T) {
|
func TestConvert(t *testing.T) {
|
||||||
for _, test := range iconvTests {
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
||||||
// setup input buffer
|
|
||||||
input := []byte(test.input)
|
input := []byte(test.input)
|
||||||
|
|
||||||
// setup a buffer as large as the expected bytesWritten
|
|
||||||
output := make([]byte, 50)
|
output := make([]byte, 50)
|
||||||
|
|
||||||
// peform the conversion
|
// peform the conversion
|
||||||
bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding)
|
bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding)
|
||||||
|
|
||||||
// check that bytesRead is same as expected
|
// HACK Convert has different erorrs, so check ourselves, and then fake out later check
|
||||||
if bytesRead != test.bytesRead {
|
if err != test.convertErr {
|
||||||
t.Errorf("test \"%s\" failed, bytesRead did not match expected", test.description)
|
if test.convertErr != nil {
|
||||||
}
|
|
||||||
|
|
||||||
// check that bytesWritten is same as expected
|
|
||||||
if bytesWritten != test.bytesWritten {
|
|
||||||
t.Errorf("test \"%s\" failed, bytesWritten did not match expected", test.description)
|
|
||||||
}
|
|
||||||
|
|
||||||
// check output bytes against expected - simplest to convert output to
|
|
||||||
// string and then do an equality check which is actually a byte wise operation
|
|
||||||
if string(output[:bytesWritten]) != test.output {
|
|
||||||
t.Errorf("test \"%s\" failed, output did not match expected", test.description)
|
|
||||||
}
|
|
||||||
|
|
||||||
// check that err is same as expected
|
|
||||||
if err != test.err {
|
|
||||||
if test.err != nil {
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err)
|
t.Errorf("err: %q expected: %q", err, test.convertErr)
|
||||||
} else {
|
} else {
|
||||||
t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err)
|
t.Errorf("err: nil expected %q", test.convertErr)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err)
|
t.Errorf("unexpected error: %q", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = test.err
|
||||||
|
|
||||||
|
return bytesRead, bytesWritten, string(output[:bytesWritten]), err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConvertString(t *testing.T) {
|
||||||
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
||||||
|
// perform the conversion
|
||||||
|
output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding)
|
||||||
|
|
||||||
|
// bytesRead and bytesWritten are spoofed a little
|
||||||
|
return test.bytesRead, len(output), output, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReader(t *testing.T) {
|
||||||
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
||||||
|
var bytesRead, bytesWritten, finalBytesWritten int
|
||||||
|
var err error
|
||||||
|
|
||||||
|
input := bytes.NewBufferString(test.input)
|
||||||
|
output := make([]byte, 50)
|
||||||
|
|
||||||
|
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
bytesWritten, err = reader.Read(output)
|
||||||
|
|
||||||
|
// we can compute how many bytes iconv read by inspecting the reader state
|
||||||
|
bytesRead = len([]byte(test.input)) - input.Len() - (reader.writePos - reader.readPos)
|
||||||
|
|
||||||
|
// with current tests and buffer sizes, we'd expect all input to be buffered if we called read
|
||||||
|
if input.Len() != 0 {
|
||||||
|
t.Error("not all bytes from input were buffered")
|
||||||
|
}
|
||||||
|
|
||||||
|
// do final read test if we can - either get EOF or same test error
|
||||||
|
if err == nil {
|
||||||
|
finalBytesWritten, err = reader.Read(output[bytesWritten:])
|
||||||
|
|
||||||
|
if finalBytesWritten != 0 {
|
||||||
|
t.Errorf("finalBytesWritten: %d expected: 0", finalBytesWritten)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == io.EOF {
|
||||||
|
err = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return bytesRead, bytesWritten, string(output[:bytesWritten]), err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriter(t *testing.T) {
|
||||||
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
||||||
|
var bytesRead, bytesWritten int
|
||||||
|
var err error
|
||||||
|
|
||||||
|
input := []byte(test.input)
|
||||||
|
output := new(bytes.Buffer)
|
||||||
|
|
||||||
|
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
bytesRead, err = writer.Write(input)
|
||||||
|
bytesRead -= writer.readPos
|
||||||
|
writer.Close()
|
||||||
|
|
||||||
|
bytesWritten = output.Len()
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytesRead, bytesWritten, output.String(), err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReaderWithCopy(t *testing.T) {
|
||||||
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
||||||
|
input := bytes.NewBufferString(test.input)
|
||||||
|
output := new(bytes.Buffer)
|
||||||
|
|
||||||
|
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
_, err := io.Copy(output, reader)
|
||||||
|
|
||||||
|
bytesRead := len(test.input) - input.Len() - reader.writePos
|
||||||
|
bytesWritten := output.Len()
|
||||||
|
|
||||||
|
return bytesRead, bytesWritten, output.String(), err
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, 0, output.String(), err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriterWithCopy(t *testing.T) {
|
||||||
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
||||||
|
input := bytes.NewBufferString(test.input)
|
||||||
|
output := new(bytes.Buffer)
|
||||||
|
|
||||||
|
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
bytesCopied, err := io.Copy(writer, input)
|
||||||
|
bytesRead := int(bytesCopied) - writer.readPos
|
||||||
|
writer.Close()
|
||||||
|
|
||||||
|
bytesWritten := output.Len()
|
||||||
|
|
||||||
|
return bytesRead, bytesWritten, output.String(), err
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, 0, output.String(), err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReaderMultipleReads(t *testing.T) {
|
||||||
|
// setup a source reader and our expected output string
|
||||||
|
source := bytes.NewBufferString("\x80\x8A\x99\x95\x8B\x86\x87")
|
||||||
|
expected := "€Š™•‹†‡"
|
||||||
|
|
||||||
|
// setup reader - use our minimum buffer size so we can force it to shuffle the buffer around
|
||||||
|
reader, err := NewReaderSized(source, "cp1252", "utf-8", minReadBufferSize)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
if err == syscall.EINVAL {
|
||||||
|
t.Skip("Either cp1252 or utf-8 isn't supported by iconv on your system")
|
||||||
|
} else {
|
||||||
|
t.Fatalf("Unexpected error when creating reader: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// setup a read buffer - we'll slice it to different sizes in our tests
|
||||||
|
buffer := make([]byte, 64)
|
||||||
|
|
||||||
|
// first read should fill internal buffer, but we'll only read part of it
|
||||||
|
bytesRead, err := reader.Read(buffer[:5])
|
||||||
|
|
||||||
|
if bytesRead != 5 || err != nil {
|
||||||
|
t.Fatalf("first read did not give expected 5, nil: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// because of how small teh source is and our minimum buffer size, source shoudl be fully read
|
||||||
|
if source.Len() != 0 {
|
||||||
|
t.Fatalf("first read did not buffer all of source like expected: %d bytes remain", source.Len())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Buffer doesn't return EOF with last bytes, reader shouldn't know its EOF yet
|
||||||
|
if reader.eof {
|
||||||
|
t.Fatalf("first read was not expected to receive EOF")
|
||||||
|
}
|
||||||
|
|
||||||
|
// second read should shift internal buffer, and fill again - make buffer too small for last utf-8 character
|
||||||
|
// E2BIG from iconv should be ignored because we wrote at least 1 byte
|
||||||
|
bytesRead, err = reader.Read(buffer[5:18])
|
||||||
|
|
||||||
|
if bytesRead != 12 || err != nil {
|
||||||
|
t.Fatalf("second read did not give expected 15, nil: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reader.eof {
|
||||||
|
t.Fatalf("second read did not put reader into eof state")
|
||||||
|
}
|
||||||
|
|
||||||
|
// try to read the last 3 byte character with only a buffer of 2 bytes - this time we should see the E2BIG
|
||||||
|
bytesRead, err = reader.Read(buffer[17:19])
|
||||||
|
|
||||||
|
if bytesRead != 0 || err != syscall.E2BIG {
|
||||||
|
t.Fatalf("third read did not give expected 0, E2BIG: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fourth read should finish last character
|
||||||
|
bytesRead, err = reader.Read(buffer[17:])
|
||||||
|
|
||||||
|
if bytesRead != 3 || err != nil {
|
||||||
|
t.Fatalf("fourth read did not give expected 3, nil: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// last read should be EOF
|
||||||
|
bytesRead, err = reader.Read(buffer[20:])
|
||||||
|
|
||||||
|
if bytesRead != 0 || err != io.EOF {
|
||||||
|
t.Fatalf("final read did not give expected 0, EOF: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check full utf-8 output
|
||||||
|
if string(buffer[:20]) != expected {
|
||||||
|
t.Fatalf("output did not match expected %q: %q", expected, string(buffer[:20]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteWithIncompleteSequence(t *testing.T) {
|
||||||
|
expected := "\x80\x8A\x99\x95\x8B\x86\x87"
|
||||||
|
input := []byte("€Š™•‹†‡")
|
||||||
|
output := new(bytes.Buffer)
|
||||||
|
|
||||||
|
writer, err := NewWriter(output, "utf-8", "cp1252")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error while creating writer %q", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// the input string is made of 3 byte characters, for the test we want to only write part of the last character
|
||||||
|
bytesFromBuffer := len(input) - 2
|
||||||
|
|
||||||
|
bytesRead, err := writer.Write(input[:bytesFromBuffer])
|
||||||
|
|
||||||
|
if bytesRead != bytesFromBuffer {
|
||||||
|
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// finish the rest
|
||||||
|
bytesRead, err = writer.Write(input[bytesFromBuffer:])
|
||||||
|
|
||||||
|
if bytesRead != 2 {
|
||||||
|
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = writer.Close()
|
||||||
|
actual := output.String()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("got an error on close: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if actual != expected {
|
||||||
|
t.Errorf("output %x did not match expected %x", actual, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteWithIncompleteSequenceAndIgnore(t *testing.T) {
|
||||||
|
if !ignoreDetected {
|
||||||
|
t.Skip("//IGNORE not supported")
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := "\x80\x8A\x99\x95\x8B\x86\x87"
|
||||||
|
input := []byte("€Š™•‹†‡")
|
||||||
|
output := new(bytes.Buffer)
|
||||||
|
|
||||||
|
writer, err := NewWriter(output, "utf-8", "cp1252//IGNORE")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error while creating writer %q", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// the input string is made of 3 byte characters, for the test we want to only write part of the last character
|
||||||
|
bytesFromBuffer := len(input) - 2
|
||||||
|
|
||||||
|
bytesRead, err := writer.Write(input[:bytesFromBuffer])
|
||||||
|
|
||||||
|
if bytesRead != bytesFromBuffer {
|
||||||
|
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// finish the rest
|
||||||
|
bytesRead, err = writer.Write(input[bytesFromBuffer:])
|
||||||
|
|
||||||
|
if bytesRead != 2 {
|
||||||
|
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = writer.Close()
|
||||||
|
actual := output.String()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("got an error on close: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if actual != expected {
|
||||||
|
t.Errorf("output %x did not match expected %x", actual, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteWithIncompleteSequenceAtEOF(t *testing.T) {
|
||||||
|
expected := "\x80\x8A\x99\x95\x8B\x86"
|
||||||
|
input := []byte("€Š™•‹†‡")
|
||||||
|
output := new(bytes.Buffer)
|
||||||
|
|
||||||
|
writer, err := NewWriter(output, "utf-8", "cp1252")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error while creating writer %q", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// the input string is made of 3 byte characters, for the test we want to only write part of the last character
|
||||||
|
bytesFromBuffer := len(input) - 2
|
||||||
|
|
||||||
|
bytesRead, err := writer.Write(input[:bytesFromBuffer])
|
||||||
|
|
||||||
|
if bytesRead != bytesFromBuffer {
|
||||||
|
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = writer.Close()
|
||||||
|
actual := output.String()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("got an error on close: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if actual != expected {
|
||||||
|
t.Errorf("output %x did not match expected %x", actual, expected)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
142
reader.go
142
reader.go
@ -2,7 +2,12 @@ package iconv
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
"syscall"
|
"runtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultReadBufferSize = 8 * 1024
|
||||||
|
minReadBufferSize = 16
|
||||||
)
|
)
|
||||||
|
|
||||||
type Reader struct {
|
type Reader struct {
|
||||||
@ -10,91 +15,100 @@ type Reader struct {
|
|||||||
converter *Converter
|
converter *Converter
|
||||||
buffer []byte
|
buffer []byte
|
||||||
readPos, writePos int
|
readPos, writePos int
|
||||||
err error
|
eof bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, error) {
|
func NewReader(source io.Reader, fromEncoding, toEncoding string) (*Reader, error) {
|
||||||
// create a converter
|
return NewReaderSized(source, fromEncoding, toEncoding, defaultReadBufferSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderFromConverter(source io.Reader, converter *Converter) *Reader {
|
||||||
|
return NewReaderFromConverterSized(source, converter, defaultReadBufferSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderSized(source io.Reader, fromEncoding, toEncoding string, size int) (*Reader, error) {
|
||||||
converter, err := NewConverter(fromEncoding, toEncoding)
|
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||||
|
|
||||||
if err == nil {
|
if err != nil {
|
||||||
return NewReaderFromConverter(source, converter), err
|
|
||||||
}
|
|
||||||
|
|
||||||
// return the error
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) {
|
// add a finalizer for the converter we created
|
||||||
reader = new(Reader)
|
runtime.SetFinalizer(converter, finalizeConverter)
|
||||||
|
|
||||||
// copy elements
|
return NewReaderFromConverterSized(source, converter, size), nil
|
||||||
reader.source = source
|
|
||||||
reader.converter = converter
|
|
||||||
|
|
||||||
// create 8K buffers
|
|
||||||
reader.buffer = make([]byte, 8*1024)
|
|
||||||
|
|
||||||
return reader
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *Reader) fillBuffer() {
|
func NewReaderFromConverterSized(source io.Reader, converter *Converter, size int) *Reader {
|
||||||
// slide existing data to beginning
|
if size < minReadBufferSize {
|
||||||
if this.readPos > 0 {
|
size = minReadBufferSize
|
||||||
// copy current bytes - is this guaranteed safe?
|
|
||||||
copy(this.buffer, this.buffer[this.readPos:this.writePos])
|
|
||||||
|
|
||||||
// adjust positions
|
|
||||||
this.writePos -= this.readPos
|
|
||||||
this.readPos = 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// read new data into buffer at write position
|
return &Reader{
|
||||||
bytesRead, err := this.source.Read(this.buffer[this.writePos:])
|
source: source,
|
||||||
|
converter: converter,
|
||||||
// adjust write position
|
buffer: make([]byte, size),
|
||||||
this.writePos += bytesRead
|
|
||||||
|
|
||||||
// track any reader error / EOF
|
|
||||||
if err != nil {
|
|
||||||
this.err = err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// implement the io.Reader interface
|
func (r *Reader) Read(p []byte) (int, error) {
|
||||||
func (this *Reader) Read(p []byte) (n int, err error) {
|
if len(p) == 0 {
|
||||||
// checks for when we have no data
|
return 0, nil
|
||||||
for this.writePos == 0 || this.readPos == this.writePos {
|
|
||||||
// if we have an error / EOF, just return it
|
|
||||||
if this.err != nil {
|
|
||||||
return n, this.err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// else, fill our buffer
|
var bytesRead, bytesWritten int
|
||||||
this.fillBuffer()
|
var err error
|
||||||
|
|
||||||
|
// setup for a single read into buffer if possible
|
||||||
|
if !r.eof {
|
||||||
|
if r.readPos > 0 {
|
||||||
|
// slide data to front of buffer
|
||||||
|
r.readPos, r.writePos = 0, copy(r.buffer, r.buffer[r.readPos:r.writePos])
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: checks for when we have less data than len(p)
|
if r.writePos < len(r.buffer) {
|
||||||
|
// do the single read
|
||||||
|
bytesRead, err = r.source.Read(r.buffer[r.writePos:])
|
||||||
|
|
||||||
// we should have an appropriate amount of data, convert it into the given buffer
|
if bytesRead < 0 {
|
||||||
bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p)
|
panic("iconv: source reader returned negative count from Read")
|
||||||
|
}
|
||||||
|
|
||||||
// adjust byte counters
|
r.writePos += bytesRead
|
||||||
this.readPos += bytesRead
|
r.eof = err == io.EOF
|
||||||
n += bytesWritten
|
|
||||||
|
|
||||||
// if we experienced an iconv error, check it
|
|
||||||
if err != nil {
|
|
||||||
// E2BIG errors can be ignored (we'll get them often) as long
|
|
||||||
// as at least 1 byte was written. If we experienced an E2BIG
|
|
||||||
// and no bytes were written then the buffer is too small for
|
|
||||||
// even the next character
|
|
||||||
if err != syscall.E2BIG || bytesWritten == 0 {
|
|
||||||
// track anything else
|
|
||||||
this.err = err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return our results
|
if r.readPos < r.writePos || r.eof {
|
||||||
return n, this.err
|
// convert any buffered data we have, or do a final reset (for shift based conversions)
|
||||||
|
bytesRead, bytesWritten, err = r.converter.Convert(r.buffer[r.readPos:r.writePos], p)
|
||||||
|
r.readPos += bytesRead
|
||||||
|
|
||||||
|
// if we experienced an iconv error and didn't make progress, report it.
|
||||||
|
// if we did make progress, it may be informational only (i.e. reporting
|
||||||
|
// an EILSEQ even when using //ignore to skip them)
|
||||||
|
if err != nil && bytesWritten == 0 {
|
||||||
|
return bytesWritten, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// signal an EOF only if we didn't write anything - accomodates premature
|
||||||
|
// errror checking in user code
|
||||||
|
if bytesWritten == 0 && r.eof {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytesWritten, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reader) Reset(source io.Reader) {
|
||||||
|
r.converter.Reset()
|
||||||
|
|
||||||
|
*r = Reader{
|
||||||
|
source: source,
|
||||||
|
converter: r.converter,
|
||||||
|
buffer: r.buffer,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
209
writer.go
209
writer.go
@ -1,82 +1,181 @@
|
|||||||
package iconv
|
package iconv
|
||||||
|
|
||||||
import "io"
|
import (
|
||||||
|
"io"
|
||||||
|
"runtime"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultWriteBufferSize = 8 * 1024
|
||||||
|
minWriteBufferSize = 16
|
||||||
|
)
|
||||||
|
|
||||||
type Writer struct {
|
type Writer struct {
|
||||||
destination io.Writer
|
destination io.Writer
|
||||||
converter *Converter
|
converter *Converter
|
||||||
buffer []byte
|
readBuffer, writeBuffer []byte
|
||||||
readPos, writePos int
|
readPos, writePos int
|
||||||
err error
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewWriter(destination io.Writer, fromEncoding string, toEncoding string) (*Writer, error) {
|
func NewWriter(destination io.Writer, fromEncoding string, toEncoding string) (*Writer, error) {
|
||||||
// create a converter
|
return NewWriterSized(destination, fromEncoding, toEncoding, defaultWriteBufferSize)
|
||||||
converter, err := NewConverter(fromEncoding, toEncoding)
|
|
||||||
|
|
||||||
if err == nil {
|
|
||||||
return NewWriterFromConverter(destination, converter), err
|
|
||||||
}
|
|
||||||
|
|
||||||
// return the error
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewWriterFromConverter(destination io.Writer, converter *Converter) (writer *Writer) {
|
func NewWriterFromConverter(destination io.Writer, converter *Converter) (writer *Writer) {
|
||||||
writer = new(Writer)
|
return NewWriterFromConverterSized(destination, converter, defaultWriteBufferSize)
|
||||||
|
|
||||||
// copy elements
|
|
||||||
writer.destination = destination
|
|
||||||
writer.converter = converter
|
|
||||||
|
|
||||||
// create 8K buffers
|
|
||||||
writer.buffer = make([]byte, 8*1024)
|
|
||||||
|
|
||||||
return writer
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *Writer) emptyBuffer() {
|
func NewWriterSized(destination io.Writer, fromEncoding, toEncoding string, size int) (*Writer, error) {
|
||||||
// write new data out of buffer
|
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||||
bytesWritten, err := this.destination.Write(this.buffer[this.readPos:this.writePos])
|
|
||||||
|
|
||||||
// update read position
|
|
||||||
this.readPos += bytesWritten
|
|
||||||
|
|
||||||
// slide existing data to beginning
|
|
||||||
if this.readPos > 0 {
|
|
||||||
// copy current bytes - is this guaranteed safe?
|
|
||||||
copy(this.buffer, this.buffer[this.readPos:this.writePos])
|
|
||||||
|
|
||||||
// adjust positions
|
|
||||||
this.writePos -= this.readPos
|
|
||||||
this.readPos = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// track any reader error / EOF
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
this.err = err
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// add a finalizer for the converter we created
|
||||||
|
runtime.SetFinalizer(converter, finalizeConverter)
|
||||||
|
|
||||||
|
return NewWriterFromConverterSized(destination, converter, size), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewWriterFromConverterSized(destination io.Writer, converter *Converter, size int) *Writer {
|
||||||
|
if size < minWriteBufferSize {
|
||||||
|
size = minWriteBufferSize
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Writer{
|
||||||
|
destination: destination,
|
||||||
|
converter: converter,
|
||||||
|
readBuffer: make([]byte, size),
|
||||||
|
writeBuffer: make([]byte, size),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// implement the io.Writer interface
|
// Implements io.Writer
|
||||||
func (this *Writer) Write(p []byte) (n int, err error) {
|
//
|
||||||
// write data into our internal buffer
|
// Will attempt to convert all of p into buffer. If there's not enough room in
|
||||||
bytesRead, bytesWritten, err := this.converter.Convert(p, this.buffer[this.writePos:])
|
// the buffer to hold all converted bytes, the buffer will be flushed and p will
|
||||||
|
// continue to be processed. Close should be called on a writer when finished
|
||||||
|
// with all writes, to ensure final shift sequences are written and buffer is
|
||||||
|
// flushed to underlying io.Writer.
|
||||||
|
//
|
||||||
|
// Can return all errors that Convert can, as well as any errors from Flush. Note
|
||||||
|
// that some errors from Convert are suppressed if we continue making progress
|
||||||
|
// on p.
|
||||||
|
func (w *Writer) Write(p []byte) (int, error) {
|
||||||
|
var totalBytesRead, bytesRead, bytesWritten int
|
||||||
|
var err error
|
||||||
|
|
||||||
// update bytes written for return
|
if w.readPos == 0 || len(p) == 0 {
|
||||||
n += bytesRead
|
bytesRead, bytesWritten, err = w.converter.Convert(p, w.writeBuffer[w.writePos:])
|
||||||
this.writePos += bytesWritten
|
totalBytesRead += bytesRead
|
||||||
|
w.writePos += bytesWritten
|
||||||
|
w.readPos = 0
|
||||||
|
} else {
|
||||||
|
// we have left over bytes from previous write that weren't complete and there's at least
|
||||||
|
// one byte being written, fill read buffer with p and try to convert, if we make progress
|
||||||
|
// we can continue conversion from p itself
|
||||||
|
bytesCopied := copy(w.readBuffer[w.readPos:], p)
|
||||||
|
|
||||||
// checks for when we have a full buffer
|
bytesRead, bytesWritten, err = w.converter.Convert(w.readBuffer[:w.readPos+bytesCopied], w.writeBuffer[w.writePos:])
|
||||||
for this.writePos > 0 {
|
|
||||||
// if we have an error, just return it
|
// if we made no progress, give up
|
||||||
if this.err != nil {
|
if bytesRead <= w.readPos {
|
||||||
return
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// else empty the buffer
|
bytesRead -= w.readPos
|
||||||
this.emptyBuffer()
|
totalBytesRead += bytesRead
|
||||||
|
|
||||||
|
w.readPos = 0
|
||||||
|
w.writePos += bytesWritten
|
||||||
}
|
}
|
||||||
|
|
||||||
return n, err
|
// try to process all of p - lots of io functions don't like short writes.
|
||||||
|
//
|
||||||
|
// There are a few error cases we need to treat specially, as long as we've
|
||||||
|
// made progress on p, E2BIG and EILSEQ should not be fatal. EINVAL isn't
|
||||||
|
// fatal as long as the rest of p fits in our buffers.
|
||||||
|
for err != nil && bytesRead > 0 {
|
||||||
|
switch err {
|
||||||
|
case syscall.E2BIG:
|
||||||
|
err = w.Flush()
|
||||||
|
|
||||||
|
case syscall.EILSEQ:
|
||||||
|
// IGNORE suffix still reports the error on convert
|
||||||
|
err = nil
|
||||||
|
|
||||||
|
// if no more bytes, don't do an empty convert (resets state)
|
||||||
|
if totalBytesRead == len(p) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
case syscall.EINVAL:
|
||||||
|
// if the rest of p fits in read buffer copy it there
|
||||||
|
if len(p[totalBytesRead:]) <= len(w.readBuffer) {
|
||||||
|
w.readPos = copy(w.readBuffer, p[totalBytesRead:])
|
||||||
|
totalBytesRead += w.readPos
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if not an ignoreable err or Flush err
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
bytesRead, bytesWritten, err = w.converter.Convert(p[totalBytesRead:], w.writeBuffer[w.writePos:])
|
||||||
|
totalBytesRead += bytesRead
|
||||||
|
w.writePos += bytesWritten
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalBytesRead, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to write any buffered data to destination writer. Returns error from
|
||||||
|
// Write call or io.ErrShortWrite if Write didn't report an error but also didn't
|
||||||
|
// accept all bytes given.
|
||||||
|
func (w *Writer) Flush() error {
|
||||||
|
if w.readPos < w.writePos {
|
||||||
|
bytesWritten, err := w.destination.Write(w.writeBuffer[:w.writePos])
|
||||||
|
|
||||||
|
if bytesWritten < 0 {
|
||||||
|
panic("iconv: writer returned negative count from Write")
|
||||||
|
}
|
||||||
|
|
||||||
|
if bytesWritten > 0 {
|
||||||
|
w.writePos = copy(w.writeBuffer, w.writeBuffer[bytesWritten:w.writePos])
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil && w.writePos > 0 {
|
||||||
|
err = io.ErrShortWrite
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform a final write with empty buffer, which allows iconv to close any shift
|
||||||
|
// sequences. A Flush is performed if needed.
|
||||||
|
func (w *Writer) Close() error {
|
||||||
|
_, err := w.Write(nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return w.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset state and direct writes to a new destination writer
|
||||||
|
func (w *Writer) Reset(destination io.Writer) {
|
||||||
|
w.converter.Reset()
|
||||||
|
|
||||||
|
*w = Writer{
|
||||||
|
destination: destination,
|
||||||
|
converter: w.converter,
|
||||||
|
readBuffer: w.readBuffer,
|
||||||
|
writeBuffer: w.writeBuffer,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user