Rework Reader and Writer
* add tests that cover same behaviors as Convert and ConvertString * align read and write behaviors with bufio to play nice * add methods that allow to customize buffer size * add methods to reset, allowing reuse
This commit is contained in:
		
							parent
							
								
									8960e66bd3
								
							
						
					
					
						commit
						a84994e6e9
					
				
							
								
								
									
										159
									
								
								converter.go
									
									
									
									
									
								
							
							
						
						
									
										159
									
								
								converter.go
									
									
									
									
									
								
							@ -6,6 +6,7 @@ package iconv
 | 
			
		||||
#cgo windows LDFLAGS: -liconv
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <iconv.h>
 | 
			
		||||
#include <locale.h>
 | 
			
		||||
 | 
			
		||||
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic
 | 
			
		||||
// Therofore we use this wrapper function, to avoid passing **char directly from go
 | 
			
		||||
@ -20,47 +21,40 @@ import "unsafe"
 | 
			
		||||
 | 
			
		||||
type Converter struct {
 | 
			
		||||
	context C.iconv_t
 | 
			
		||||
	open    bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
 | 
			
		||||
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
 | 
			
		||||
// there is not enough memory to initialize an iconv descriptor
 | 
			
		||||
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
 | 
			
		||||
	converter = new(Converter)
 | 
			
		||||
 | 
			
		||||
func NewConverter(fromEncoding string, toEncoding string) (*Converter, error) {
 | 
			
		||||
	// convert to C strings
 | 
			
		||||
	toEncodingC := C.CString(toEncoding)
 | 
			
		||||
	fromEncodingC := C.CString(fromEncoding)
 | 
			
		||||
 | 
			
		||||
	// open an iconv descriptor
 | 
			
		||||
	converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)
 | 
			
		||||
	context, err := C.iconv_open(toEncodingC, fromEncodingC)
 | 
			
		||||
 | 
			
		||||
	// free the C Strings
 | 
			
		||||
	C.free(unsafe.Pointer(toEncodingC))
 | 
			
		||||
	C.free(unsafe.Pointer(fromEncodingC))
 | 
			
		||||
 | 
			
		||||
	// check err
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		// no error, mark the context as open
 | 
			
		||||
		converter.open = true
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return
 | 
			
		||||
	return &Converter{context}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// destroy is called during garbage collection
 | 
			
		||||
func (this *Converter) destroy() {
 | 
			
		||||
	this.Close()
 | 
			
		||||
// Close a Converter's iconv descriptor explicitly
 | 
			
		||||
func (converter *Converter) Close() error {
 | 
			
		||||
	_, err := C.iconv_close(converter.context)
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Close a Converter's iconv description explicitly
 | 
			
		||||
func (this *Converter) Close() (err error) {
 | 
			
		||||
	if this.open {
 | 
			
		||||
		_, err = C.iconv_close(this.context)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return
 | 
			
		||||
// Reset state of iconv context
 | 
			
		||||
func (converter *Converter) Reset() error {
 | 
			
		||||
	_, _, err := converter.Convert(nil, nil)
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert bytes from an input byte slice into a give output byte slice
 | 
			
		||||
@ -74,95 +68,82 @@ func (this *Converter) Close() (err error) {
 | 
			
		||||
// For shift based output encodings, any end shift byte sequences can be generated by
 | 
			
		||||
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
 | 
			
		||||
// will simply reset the iconv descriptor shift state without writing any bytes.
 | 
			
		||||
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
 | 
			
		||||
	// make sure we are still open
 | 
			
		||||
	if this.open {
 | 
			
		||||
		inputLeft := C.size_t(len(input))
 | 
			
		||||
		outputLeft := C.size_t(len(output))
 | 
			
		||||
func (converter *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
 | 
			
		||||
	inputLeft := C.size_t(len(input))
 | 
			
		||||
	outputLeft := C.size_t(len(output))
 | 
			
		||||
 | 
			
		||||
		if inputLeft > 0 && outputLeft > 0 {
 | 
			
		||||
			// we have to give iconv a pointer to a pointer of the underlying
 | 
			
		||||
			// storage of each byte slice - so far this is the simplest
 | 
			
		||||
			// way i've found to do that in Go, but it seems ugly
 | 
			
		||||
			inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
 | 
			
		||||
			outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
 | 
			
		||||
	var inputPointer, outputPointer *C.char
 | 
			
		||||
 | 
			
		||||
			_, err = C.call_iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
 | 
			
		||||
 | 
			
		||||
			// update byte counters
 | 
			
		||||
			bytesRead = len(input) - int(inputLeft)
 | 
			
		||||
			bytesWritten = len(output) - int(outputLeft)
 | 
			
		||||
		} else if inputLeft == 0 && outputLeft > 0 {
 | 
			
		||||
			// inputPointer will be nil, outputPointer is generated as above
 | 
			
		||||
			outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
 | 
			
		||||
 | 
			
		||||
			_, err = C.call_iconv(this.context, nil, &inputLeft, outputPointer, &outputLeft)
 | 
			
		||||
 | 
			
		||||
			// update write byte counter
 | 
			
		||||
			bytesWritten = len(output) - int(outputLeft)
 | 
			
		||||
		} else {
 | 
			
		||||
			// both input and output are zero length, do a shift state reset
 | 
			
		||||
			_, err = C.call_iconv(this.context, nil, &inputLeft, nil, &outputLeft)
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		err = syscall.EBADF
 | 
			
		||||
	if inputLeft > 0 {
 | 
			
		||||
		inputPointer = (*C.char)(unsafe.Pointer(&input[0]))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if outputLeft > 0 {
 | 
			
		||||
		outputPointer = (*C.char)(unsafe.Pointer(&output[0]))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	_, err = C.call_iconv(converter.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
 | 
			
		||||
 | 
			
		||||
	bytesRead = len(input) - int(inputLeft)
 | 
			
		||||
	bytesWritten = len(output) - int(outputLeft)
 | 
			
		||||
 | 
			
		||||
	return bytesRead, bytesWritten, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert an input string
 | 
			
		||||
//
 | 
			
		||||
// EILSEQ error may be returned if input contains invalid bytes for the
 | 
			
		||||
// Converter's fromEncoding.
 | 
			
		||||
func (this *Converter) ConvertString(input string) (output string, err error) {
 | 
			
		||||
	// make sure we are still open
 | 
			
		||||
	if this.open {
 | 
			
		||||
		// construct the buffers
 | 
			
		||||
		inputBuffer := []byte(input)
 | 
			
		||||
		outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
 | 
			
		||||
// EILSEQ error may be returned if input contains invalid bytes for the Converter's fromEncoding
 | 
			
		||||
func (converter *Converter) ConvertString(input string) (output string, err error) {
 | 
			
		||||
	// construct the buffers
 | 
			
		||||
	inputBuffer := []byte(input)
 | 
			
		||||
	outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
 | 
			
		||||
 | 
			
		||||
		// call Convert until all input bytes are read or an error occurs
 | 
			
		||||
		var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
 | 
			
		||||
	// call Convert until all input bytes are read or an error occurs
 | 
			
		||||
	var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
 | 
			
		||||
 | 
			
		||||
		for totalBytesRead < len(inputBuffer) && err == nil {
 | 
			
		||||
			// use the totals to create buffer slices
 | 
			
		||||
			bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
 | 
			
		||||
	for totalBytesRead < len(inputBuffer) && err == nil {
 | 
			
		||||
		// use the totals to create buffer slices
 | 
			
		||||
		bytesRead, bytesWritten, err = converter.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
 | 
			
		||||
 | 
			
		||||
			totalBytesRead += bytesRead
 | 
			
		||||
			totalBytesWritten += bytesWritten
 | 
			
		||||
		totalBytesRead += bytesRead
 | 
			
		||||
		totalBytesWritten += bytesWritten
 | 
			
		||||
 | 
			
		||||
			// check for the E2BIG error specifically, we can add to the output
 | 
			
		||||
			// buffer to correct for it and then continue
 | 
			
		||||
			if err == syscall.E2BIG {
 | 
			
		||||
				// increase the size of the output buffer by another input length
 | 
			
		||||
				// first, create a new buffer
 | 
			
		||||
				tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
 | 
			
		||||
		switch err {
 | 
			
		||||
		case syscall.E2BIG:
 | 
			
		||||
			// increase the size of the output buffer by another input length
 | 
			
		||||
			// first, create a new buffer
 | 
			
		||||
			tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
 | 
			
		||||
 | 
			
		||||
				// copy the existing data
 | 
			
		||||
				copy(tempBuffer, outputBuffer)
 | 
			
		||||
			// copy the existing data
 | 
			
		||||
			copy(tempBuffer, outputBuffer)
 | 
			
		||||
 | 
			
		||||
				// switch the buffers
 | 
			
		||||
				outputBuffer = tempBuffer
 | 
			
		||||
			// switch the buffers
 | 
			
		||||
			outputBuffer = tempBuffer
 | 
			
		||||
 | 
			
		||||
				// forget the error
 | 
			
		||||
			// forget the error
 | 
			
		||||
			err = nil
 | 
			
		||||
		case syscall.EILSEQ, syscall.EINVAL:
 | 
			
		||||
			// iconv can still return these in cases where it still can proceed such as //IGNORE
 | 
			
		||||
			if bytesRead > 0 || bytesWritten > 0 {
 | 
			
		||||
				err = nil
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			// perform a final shift state reset
 | 
			
		||||
			_, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
 | 
			
		||||
 | 
			
		||||
			// update total count
 | 
			
		||||
			totalBytesWritten += bytesWritten
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// construct the final output string
 | 
			
		||||
		output = string(outputBuffer[:totalBytesWritten])
 | 
			
		||||
	} else {
 | 
			
		||||
		err = syscall.EBADF
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		// perform a final shift state reset
 | 
			
		||||
		_, bytesWritten, err = converter.Convert(nil, outputBuffer[totalBytesWritten:])
 | 
			
		||||
 | 
			
		||||
		// update total count
 | 
			
		||||
		totalBytesWritten += bytesWritten
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// construct the final output string
 | 
			
		||||
	output = string(outputBuffer[:totalBytesWritten])
 | 
			
		||||
 | 
			
		||||
	return output, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func finalizeConverter(converter *Converter) {
 | 
			
		||||
	converter.Close()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										540
									
								
								iconv_test.go
									
									
									
									
									
								
							
							
						
						
									
										540
									
								
								iconv_test.go
									
									
									
									
									
								
							@ -1,6 +1,9 @@
 | 
			
		||||
package iconv
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"io"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"syscall"
 | 
			
		||||
	"testing"
 | 
			
		||||
)
 | 
			
		||||
@ -13,105 +16,486 @@ type iconvTest struct {
 | 
			
		||||
	outputEncoding string
 | 
			
		||||
	bytesRead      int
 | 
			
		||||
	bytesWritten   int
 | 
			
		||||
	err            error
 | 
			
		||||
	convertErr     error // err from Convert (raw iconv)
 | 
			
		||||
	err            error // err from CovertString, Reader, Writer
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var iconvTests = []iconvTest{
 | 
			
		||||
	iconvTest{
 | 
			
		||||
		"simple utf-8 to latin1 conversion success",
 | 
			
		||||
		"Hello World!", "utf-8",
 | 
			
		||||
		"Hello World!", "latin1",
 | 
			
		||||
		12, 12, nil,
 | 
			
		||||
	},
 | 
			
		||||
	iconvTest{
 | 
			
		||||
		"invalid source encoding causes EINVAL",
 | 
			
		||||
		"", "doesnotexist",
 | 
			
		||||
		"", "utf-8",
 | 
			
		||||
		0, 0, syscall.EINVAL,
 | 
			
		||||
	},
 | 
			
		||||
	iconvTest{
 | 
			
		||||
		"invalid destination encoding causes EINVAL",
 | 
			
		||||
		"", "utf-8",
 | 
			
		||||
		"", "doesnotexist",
 | 
			
		||||
		0, 0, syscall.EINVAL,
 | 
			
		||||
	},
 | 
			
		||||
	iconvTest{
 | 
			
		||||
		"invalid input sequence causes EILSEQ",
 | 
			
		||||
		"\xFF", "utf-8",
 | 
			
		||||
		"", "latin1",
 | 
			
		||||
		0, 0, syscall.EILSEQ,
 | 
			
		||||
	},
 | 
			
		||||
	iconvTest{
 | 
			
		||||
		"invalid input causes partial output and EILSEQ",
 | 
			
		||||
		"Hello\xFF", "utf-8",
 | 
			
		||||
		"Hello", "latin1",
 | 
			
		||||
		5, 5, syscall.EILSEQ,
 | 
			
		||||
	},
 | 
			
		||||
var (
 | 
			
		||||
	iconvTests = []iconvTest{
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"simple utf-8 to latin1 conversion success",
 | 
			
		||||
			"Hello World!", "utf-8",
 | 
			
		||||
			"Hello World!", "latin1",
 | 
			
		||||
			12, 12, nil, nil,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"invalid source encoding causes EINVAL",
 | 
			
		||||
			"", "doesnotexist",
 | 
			
		||||
			"", "utf-8",
 | 
			
		||||
			0, 0, syscall.EINVAL, syscall.EINVAL,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"invalid destination encoding causes EINVAL",
 | 
			
		||||
			"", "utf-8",
 | 
			
		||||
			"", "doesnotexist",
 | 
			
		||||
			0, 0, syscall.EINVAL, syscall.EINVAL,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"utf-8 to utf-8 passthrough",
 | 
			
		||||
			"Hello world!", "utf-8",
 | 
			
		||||
			"Hello world!", "utf-8",
 | 
			
		||||
			12, 12, nil, nil,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"utf-8 to utf-8 partial",
 | 
			
		||||
			"Hello\xFFWorld!", "utf-8",
 | 
			
		||||
			"Hello", "utf-8",
 | 
			
		||||
			5, 5, syscall.EILSEQ, syscall.EILSEQ,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"utf-8 to utf-8 ignored",
 | 
			
		||||
			"Hello \xFFWorld!", "utf-8",
 | 
			
		||||
			"Hello World!", "utf-8//IGNORE",
 | 
			
		||||
			13, 12, syscall.EILSEQ, nil,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"invalid input sequence causes EILSEQ",
 | 
			
		||||
			"\xFF", "utf-8",
 | 
			
		||||
			"", "latin1",
 | 
			
		||||
			0, 0, syscall.EILSEQ, syscall.EILSEQ,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"incomplete input sequence causes EINVAL",
 | 
			
		||||
			"\xC2", "utf-8",
 | 
			
		||||
			"", "latin1",
 | 
			
		||||
			0, 0, syscall.EINVAL, syscall.EINVAL,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"invalid input causes partial output and EILSEQ",
 | 
			
		||||
			"Hello\xFF", "utf-8",
 | 
			
		||||
			"Hello", "latin1",
 | 
			
		||||
			5, 5, syscall.EILSEQ, syscall.EILSEQ,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"incomplete input causes partial output and EILSEQ",
 | 
			
		||||
			"Hello\xC2", "utf-8",
 | 
			
		||||
			"Hello", "latin1",
 | 
			
		||||
			5, 5, syscall.EINVAL, syscall.EINVAL,
 | 
			
		||||
		},
 | 
			
		||||
		/* this is only true for glibc / iconv
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"valid input but no conversion causes EILSEQ",
 | 
			
		||||
			"你好世界 Hello World", "utf-8",
 | 
			
		||||
			"", "latin1",
 | 
			
		||||
			0, 0, syscall.EILSEQ, syscall.EILSEQ,
 | 
			
		||||
		},*/
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"invalid input with ignore",
 | 
			
		||||
			"Hello\xFF World!", "utf-8",
 | 
			
		||||
			"Hello World!", "latin1//IGNORE",
 | 
			
		||||
			13, 12, syscall.EILSEQ, nil,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"valid input but no conversion with IGNORE",
 | 
			
		||||
			"你好世界 Hello World", "utf-8",
 | 
			
		||||
			" Hello World", "latin1//IGNORE",
 | 
			
		||||
			24, 12, syscall.EILSEQ, nil,
 | 
			
		||||
		},
 | 
			
		||||
		iconvTest{
 | 
			
		||||
			"valid input but no conversion with TRANSLIT",
 | 
			
		||||
			"你好世界 Hello World", "utf-8",
 | 
			
		||||
			"???? Hello World", "latin1//TRANSLIT",
 | 
			
		||||
			24, 16, nil, nil,
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ignoreDetected, translitDetected bool
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func init() {
 | 
			
		||||
	// detect if IGNORE / TRANSLIT is supported (glic / libiconv)
 | 
			
		||||
	conv, err := NewConverter("utf-8", "ascii//IGNORE")
 | 
			
		||||
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		ignoreDetected = true
 | 
			
		||||
		conv.Close()
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	conv, err = NewConverter("utf-8", "ascii//TRANSLIT")
 | 
			
		||||
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		translitDetected = true
 | 
			
		||||
		conv.Close()
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestConvertString(t *testing.T) {
 | 
			
		||||
func runTests(t *testing.T, f func(iconvTest, *testing.T) (int, int, string, error)) {
 | 
			
		||||
	for _, test := range iconvTests {
 | 
			
		||||
		// perform the conversion
 | 
			
		||||
		output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding)
 | 
			
		||||
 | 
			
		||||
		// check that output and err match
 | 
			
		||||
		if output != test.output {
 | 
			
		||||
			t.Errorf("test \"%s\" failed, output did not match expected", test.description)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// check that err is same as expected
 | 
			
		||||
		if err != test.err {
 | 
			
		||||
			if test.err != nil {
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err)
 | 
			
		||||
				} else {
 | 
			
		||||
					t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err)
 | 
			
		||||
				}
 | 
			
		||||
			} else {
 | 
			
		||||
				t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err)
 | 
			
		||||
		t.Run(test.description, func(t *testing.T) {
 | 
			
		||||
			if !ignoreDetected && strings.HasSuffix(test.outputEncoding, "//IGNORE") {
 | 
			
		||||
				t.Skip("//IGNORE not supported")
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
			if !translitDetected && strings.HasSuffix(test.outputEncoding, "//TRANSLIT") {
 | 
			
		||||
				t.Skip("//TRANSLIT not supported")
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			bytesRead, bytesWritten, output, err := f(test, t)
 | 
			
		||||
 | 
			
		||||
			// check that bytesRead is same as expected
 | 
			
		||||
			if bytesRead != test.bytesRead {
 | 
			
		||||
				t.Errorf("bytesRead: %d expected: %d", bytesRead, test.bytesRead)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// check that bytesWritten is same as expected
 | 
			
		||||
			if bytesWritten != test.bytesWritten {
 | 
			
		||||
				t.Errorf("bytesWritten: %d expected: %d", bytesWritten, test.bytesWritten)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// check output bytes against expected
 | 
			
		||||
			if output != test.output {
 | 
			
		||||
				t.Errorf("output: %x expected: %x", output, test.output)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// check that err is same as expected
 | 
			
		||||
			if err != test.err {
 | 
			
		||||
				if test.err != nil {
 | 
			
		||||
					if err != nil {
 | 
			
		||||
						t.Errorf("err: %q expected: %q", err, test.err)
 | 
			
		||||
					} else {
 | 
			
		||||
						t.Errorf("err: nil expected %q", test.err)
 | 
			
		||||
					}
 | 
			
		||||
				} else {
 | 
			
		||||
					t.Errorf("unexpected error: %q", err)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestConvert(t *testing.T) {
 | 
			
		||||
	for _, test := range iconvTests {
 | 
			
		||||
		// setup input buffer
 | 
			
		||||
	runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
 | 
			
		||||
		input := []byte(test.input)
 | 
			
		||||
 | 
			
		||||
		// setup a buffer as large as the expected bytesWritten
 | 
			
		||||
		output := make([]byte, 50)
 | 
			
		||||
 | 
			
		||||
		// peform the conversion
 | 
			
		||||
		bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding)
 | 
			
		||||
 | 
			
		||||
		// check that bytesRead is same as expected
 | 
			
		||||
		if bytesRead != test.bytesRead {
 | 
			
		||||
			t.Errorf("test \"%s\" failed, bytesRead did not match expected", test.description)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// check that bytesWritten is same as expected
 | 
			
		||||
		if bytesWritten != test.bytesWritten {
 | 
			
		||||
			t.Errorf("test \"%s\" failed, bytesWritten did not match expected", test.description)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// check output bytes against expected - simplest to convert output to
 | 
			
		||||
		// string and then do an equality check which is actually a byte wise operation
 | 
			
		||||
		if string(output[:bytesWritten]) != test.output {
 | 
			
		||||
			t.Errorf("test \"%s\" failed, output did not match expected", test.description)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// check that err is same as expected
 | 
			
		||||
		if err != test.err {
 | 
			
		||||
			if test.err != nil {
 | 
			
		||||
		// HACK Convert has different erorrs, so check ourselves, and then fake out later check
 | 
			
		||||
		if err != test.convertErr {
 | 
			
		||||
			if test.convertErr != nil {
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					t.Errorf("test \"%s\" failed, got %s when expecting %s", test.description, err, test.err)
 | 
			
		||||
					t.Errorf("err: %q expected: %q", err, test.convertErr)
 | 
			
		||||
				} else {
 | 
			
		||||
					t.Errorf("test \"%s\" failed, got nil when expecting %s", test.description, test.err)
 | 
			
		||||
					t.Errorf("err: nil expected %q", test.convertErr)
 | 
			
		||||
				}
 | 
			
		||||
			} else {
 | 
			
		||||
				t.Errorf("test \"%s\" failed, got unexpected error: %s", test.description, err)
 | 
			
		||||
				t.Errorf("unexpected error: %q", err)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		err = test.err
 | 
			
		||||
 | 
			
		||||
		return bytesRead, bytesWritten, string(output[:bytesWritten]), err
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestConvertString(t *testing.T) {
 | 
			
		||||
	runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
 | 
			
		||||
		// perform the conversion
 | 
			
		||||
		output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding)
 | 
			
		||||
 | 
			
		||||
		// bytesRead and bytesWritten are spoofed a little
 | 
			
		||||
		return test.bytesRead, len(output), output, err
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReader(t *testing.T) {
 | 
			
		||||
	runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
 | 
			
		||||
		var bytesRead, bytesWritten, finalBytesWritten int
 | 
			
		||||
		var err error
 | 
			
		||||
 | 
			
		||||
		input := bytes.NewBufferString(test.input)
 | 
			
		||||
		output := make([]byte, 50)
 | 
			
		||||
 | 
			
		||||
		reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
 | 
			
		||||
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			bytesWritten, err = reader.Read(output)
 | 
			
		||||
 | 
			
		||||
			// we can compute how many bytes iconv read by inspecting the reader state
 | 
			
		||||
			bytesRead = len([]byte(test.input)) - input.Len() - (reader.writePos - reader.readPos)
 | 
			
		||||
 | 
			
		||||
			// with current tests and buffer sizes, we'd expect all input to be buffered if we called read
 | 
			
		||||
			if input.Len() != 0 {
 | 
			
		||||
				t.Error("not all bytes from input were buffered")
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// do final read test if we can - either get EOF or same test error
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				finalBytesWritten, err = reader.Read(output[bytesWritten:])
 | 
			
		||||
 | 
			
		||||
				if finalBytesWritten != 0 {
 | 
			
		||||
					t.Errorf("finalBytesWritten: %d expected: 0", finalBytesWritten)
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				if err == io.EOF {
 | 
			
		||||
					err = nil
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return bytesRead, bytesWritten, string(output[:bytesWritten]), err
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestWriter(t *testing.T) {
 | 
			
		||||
	runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
 | 
			
		||||
		var bytesRead, bytesWritten int
 | 
			
		||||
		var err error
 | 
			
		||||
 | 
			
		||||
		input := []byte(test.input)
 | 
			
		||||
		output := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
		writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
 | 
			
		||||
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			bytesRead, err = writer.Write(input)
 | 
			
		||||
			bytesRead -= writer.readPos
 | 
			
		||||
			writer.Close()
 | 
			
		||||
 | 
			
		||||
			bytesWritten = output.Len()
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return bytesRead, bytesWritten, output.String(), err
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReaderWithCopy(t *testing.T) {
 | 
			
		||||
	runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
 | 
			
		||||
		input := bytes.NewBufferString(test.input)
 | 
			
		||||
		output := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
		reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
 | 
			
		||||
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			_, err := io.Copy(output, reader)
 | 
			
		||||
 | 
			
		||||
			bytesRead := len(test.input) - input.Len() - reader.writePos
 | 
			
		||||
			bytesWritten := output.Len()
 | 
			
		||||
 | 
			
		||||
			return bytesRead, bytesWritten, output.String(), err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return 0, 0, output.String(), err
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestWriterWithCopy(t *testing.T) {
 | 
			
		||||
	runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
 | 
			
		||||
		input := bytes.NewBufferString(test.input)
 | 
			
		||||
		output := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
		writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
 | 
			
		||||
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			bytesCopied, err := io.Copy(writer, input)
 | 
			
		||||
			bytesRead := int(bytesCopied) - writer.readPos
 | 
			
		||||
			writer.Close()
 | 
			
		||||
 | 
			
		||||
			bytesWritten := output.Len()
 | 
			
		||||
 | 
			
		||||
			return bytesRead, bytesWritten, output.String(), err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return 0, 0, output.String(), err
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReaderMultipleReads(t *testing.T) {
 | 
			
		||||
	// setup a source reader and our expected output string
 | 
			
		||||
	source := bytes.NewBufferString("\x80\x8A\x99\x95\x8B\x86\x87")
 | 
			
		||||
	expected := "€Š™•‹†‡"
 | 
			
		||||
 | 
			
		||||
	// setup reader - use our minimum buffer size so we can force it to shuffle the buffer around
 | 
			
		||||
	reader, err := NewReaderSized(source, "cp1252", "utf-8", minReadBufferSize)
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		if err == syscall.EINVAL {
 | 
			
		||||
			t.Skip("Either cp1252 or utf-8 isn't supported by iconv on your system")
 | 
			
		||||
		} else {
 | 
			
		||||
			t.Fatalf("Unexpected error when creating reader: %s", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// setup a read buffer - we'll slice it to different sizes in our tests
 | 
			
		||||
	buffer := make([]byte, 64)
 | 
			
		||||
 | 
			
		||||
	// first read should fill internal buffer, but we'll only read part of it
 | 
			
		||||
	bytesRead, err := reader.Read(buffer[:5])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != 5 || err != nil {
 | 
			
		||||
		t.Fatalf("first read did not give expected 5, nil: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// because of how small teh source is and our minimum buffer size, source shoudl be fully read
 | 
			
		||||
	if source.Len() != 0 {
 | 
			
		||||
		t.Fatalf("first read did not buffer all of source like expected: %d bytes remain", source.Len())
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Buffer doesn't return EOF with last bytes, reader shouldn't know its EOF yet
 | 
			
		||||
	if reader.eof {
 | 
			
		||||
		t.Fatalf("first read was not expected to receive EOF")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// second read should shift internal buffer, and fill again - make buffer too small for last utf-8 character
 | 
			
		||||
	// E2BIG from iconv should be ignored because we wrote at least 1 byte
 | 
			
		||||
	bytesRead, err = reader.Read(buffer[5:18])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != 12 || err != nil {
 | 
			
		||||
		t.Fatalf("second read did not give expected 15, nil: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !reader.eof {
 | 
			
		||||
		t.Fatalf("second read did not put reader into eof state")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// try to read the last 3 byte character with only a buffer of 2 bytes - this time we should see the E2BIG
 | 
			
		||||
	bytesRead, err = reader.Read(buffer[17:19])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != 0 || err != syscall.E2BIG {
 | 
			
		||||
		t.Fatalf("third read did not give expected 0, E2BIG: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// fourth read should finish last character
 | 
			
		||||
	bytesRead, err = reader.Read(buffer[17:])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != 3 || err != nil {
 | 
			
		||||
		t.Fatalf("fourth read did not give expected 3, nil: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// last read should be EOF
 | 
			
		||||
	bytesRead, err = reader.Read(buffer[20:])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != 0 || err != io.EOF {
 | 
			
		||||
		t.Fatalf("final read did not give expected 0, EOF: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// check full utf-8 output
 | 
			
		||||
	if string(buffer[:20]) != expected {
 | 
			
		||||
		t.Fatalf("output did not match expected %q: %q", expected, string(buffer[:20]))
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestWriteWithIncompleteSequence(t *testing.T) {
 | 
			
		||||
	expected := "\x80\x8A\x99\x95\x8B\x86\x87"
 | 
			
		||||
	input := []byte("€Š™•‹†‡")
 | 
			
		||||
	output := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
	writer, err := NewWriter(output, "utf-8", "cp1252")
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Fatalf("unexpected error while creating writer %q", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// the input string is made of 3 byte characters, for the test we want to only write part of the last character
 | 
			
		||||
	bytesFromBuffer := len(input) - 2
 | 
			
		||||
 | 
			
		||||
	bytesRead, err := writer.Write(input[:bytesFromBuffer])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != bytesFromBuffer {
 | 
			
		||||
		t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// finish the rest
 | 
			
		||||
	bytesRead, err = writer.Write(input[bytesFromBuffer:])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != 2 {
 | 
			
		||||
		t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = writer.Close()
 | 
			
		||||
	actual := output.String()
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Errorf("got an error on close: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if actual != expected {
 | 
			
		||||
		t.Errorf("output %x did not match expected %x", actual, expected)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestWriteWithIncompleteSequenceAndIgnore(t *testing.T) {
 | 
			
		||||
	if !ignoreDetected {
 | 
			
		||||
		t.Skip("//IGNORE not supported")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	expected := "\x80\x8A\x99\x95\x8B\x86\x87"
 | 
			
		||||
	input := []byte("€Š™•‹†‡")
 | 
			
		||||
	output := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
	writer, err := NewWriter(output, "utf-8", "cp1252//IGNORE")
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Fatalf("unexpected error while creating writer %q", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// the input string is made of 3 byte characters, for the test we want to only write part of the last character
 | 
			
		||||
	bytesFromBuffer := len(input) - 2
 | 
			
		||||
 | 
			
		||||
	bytesRead, err := writer.Write(input[:bytesFromBuffer])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != bytesFromBuffer {
 | 
			
		||||
		t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// finish the rest
 | 
			
		||||
	bytesRead, err = writer.Write(input[bytesFromBuffer:])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != 2 {
 | 
			
		||||
		t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = writer.Close()
 | 
			
		||||
	actual := output.String()
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Errorf("got an error on close: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if actual != expected {
 | 
			
		||||
		t.Errorf("output %x did not match expected %x", actual, expected)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestWriteWithIncompleteSequenceAtEOF(t *testing.T) {
 | 
			
		||||
	expected := "\x80\x8A\x99\x95\x8B\x86"
 | 
			
		||||
	input := []byte("€Š™•‹†‡")
 | 
			
		||||
	output := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
	writer, err := NewWriter(output, "utf-8", "cp1252")
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Fatalf("unexpected error while creating writer %q", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// the input string is made of 3 byte characters, for the test we want to only write part of the last character
 | 
			
		||||
	bytesFromBuffer := len(input) - 2
 | 
			
		||||
 | 
			
		||||
	bytesRead, err := writer.Write(input[:bytesFromBuffer])
 | 
			
		||||
 | 
			
		||||
	if bytesRead != bytesFromBuffer {
 | 
			
		||||
		t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = writer.Close()
 | 
			
		||||
	actual := output.String()
 | 
			
		||||
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		t.Errorf("got an error on close: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if actual != expected {
 | 
			
		||||
		t.Errorf("output %x did not match expected %x", actual, expected)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										160
									
								
								reader.go
									
									
									
									
									
								
							
							
						
						
									
										160
									
								
								reader.go
									
									
									
									
									
								
							@ -2,7 +2,12 @@ package iconv
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"io"
 | 
			
		||||
	"syscall"
 | 
			
		||||
	"runtime"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	defaultReadBufferSize = 8 * 1024
 | 
			
		||||
	minReadBufferSize     = 16
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Reader struct {
 | 
			
		||||
@ -10,91 +15,100 @@ type Reader struct {
 | 
			
		||||
	converter         *Converter
 | 
			
		||||
	buffer            []byte
 | 
			
		||||
	readPos, writePos int
 | 
			
		||||
	err               error
 | 
			
		||||
	eof               bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, error) {
 | 
			
		||||
	// create a converter
 | 
			
		||||
func NewReader(source io.Reader, fromEncoding, toEncoding string) (*Reader, error) {
 | 
			
		||||
	return NewReaderSized(source, fromEncoding, toEncoding, defaultReadBufferSize)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewReaderFromConverter(source io.Reader, converter *Converter) *Reader {
 | 
			
		||||
	return NewReaderFromConverterSized(source, converter, defaultReadBufferSize)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewReaderSized(source io.Reader, fromEncoding, toEncoding string, size int) (*Reader, error) {
 | 
			
		||||
	converter, err := NewConverter(fromEncoding, toEncoding)
 | 
			
		||||
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		return NewReaderFromConverter(source, converter), err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// return the error
 | 
			
		||||
	return nil, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) {
 | 
			
		||||
	reader = new(Reader)
 | 
			
		||||
 | 
			
		||||
	// copy elements
 | 
			
		||||
	reader.source = source
 | 
			
		||||
	reader.converter = converter
 | 
			
		||||
 | 
			
		||||
	// create 8K buffers
 | 
			
		||||
	reader.buffer = make([]byte, 8*1024)
 | 
			
		||||
 | 
			
		||||
	return reader
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Reader) fillBuffer() {
 | 
			
		||||
	// slide existing data to beginning
 | 
			
		||||
	if this.readPos > 0 {
 | 
			
		||||
		// copy current bytes - is this guaranteed safe?
 | 
			
		||||
		copy(this.buffer, this.buffer[this.readPos:this.writePos])
 | 
			
		||||
 | 
			
		||||
		// adjust positions
 | 
			
		||||
		this.writePos -= this.readPos
 | 
			
		||||
		this.readPos = 0
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// read new data into buffer at write position
 | 
			
		||||
	bytesRead, err := this.source.Read(this.buffer[this.writePos:])
 | 
			
		||||
 | 
			
		||||
	// adjust write position
 | 
			
		||||
	this.writePos += bytesRead
 | 
			
		||||
 | 
			
		||||
	// track any reader error / EOF
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		this.err = err
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// add a finalizer for the converter we created
 | 
			
		||||
	runtime.SetFinalizer(converter, finalizeConverter)
 | 
			
		||||
 | 
			
		||||
	return NewReaderFromConverterSized(source, converter, size), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewReaderFromConverterSized(source io.Reader, converter *Converter, size int) *Reader {
 | 
			
		||||
	if size < minReadBufferSize {
 | 
			
		||||
		size = minReadBufferSize
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &Reader{
 | 
			
		||||
		source:    source,
 | 
			
		||||
		converter: converter,
 | 
			
		||||
		buffer:    make([]byte, size),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// implement the io.Reader interface
 | 
			
		||||
func (this *Reader) Read(p []byte) (n int, err error) {
 | 
			
		||||
	// checks for when we have no data
 | 
			
		||||
	for this.writePos == 0 || this.readPos == this.writePos {
 | 
			
		||||
		// if we have an error / EOF, just return it
 | 
			
		||||
		if this.err != nil {
 | 
			
		||||
			return n, this.err
 | 
			
		||||
func (r *Reader) Read(p []byte) (int, error) {
 | 
			
		||||
	if len(p) == 0 {
 | 
			
		||||
		return 0, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var bytesRead, bytesWritten int
 | 
			
		||||
	var err error
 | 
			
		||||
 | 
			
		||||
	// setup for a single read into buffer if possible
 | 
			
		||||
	if !r.eof {
 | 
			
		||||
		if r.readPos > 0 {
 | 
			
		||||
			// slide data to front of buffer
 | 
			
		||||
			r.readPos, r.writePos = 0, copy(r.buffer, r.buffer[r.readPos:r.writePos])
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// else, fill our buffer
 | 
			
		||||
		this.fillBuffer()
 | 
			
		||||
	}
 | 
			
		||||
		if r.writePos < len(r.buffer) {
 | 
			
		||||
			// do the single read
 | 
			
		||||
			bytesRead, err = r.source.Read(r.buffer[r.writePos:])
 | 
			
		||||
 | 
			
		||||
	// TODO: checks for when we have less data than len(p)
 | 
			
		||||
			if bytesRead < 0 {
 | 
			
		||||
				panic("iconv: source reader returned negative count from Read")
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
	// we should have an appropriate amount of data, convert it into the given buffer
 | 
			
		||||
	bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p)
 | 
			
		||||
 | 
			
		||||
	// adjust byte counters
 | 
			
		||||
	this.readPos += bytesRead
 | 
			
		||||
	n += bytesWritten
 | 
			
		||||
 | 
			
		||||
	// if we experienced an iconv error, check it
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		// E2BIG errors can be ignored (we'll get them often) as long
 | 
			
		||||
		// as at least 1 byte was written. If we experienced an E2BIG
 | 
			
		||||
		// and no bytes were written then the buffer is too small for
 | 
			
		||||
		// even the next character
 | 
			
		||||
		if err != syscall.E2BIG || bytesWritten == 0 {
 | 
			
		||||
			// track anything else
 | 
			
		||||
			this.err = err
 | 
			
		||||
			r.writePos += bytesRead
 | 
			
		||||
			r.eof = err == io.EOF
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// return our results
 | 
			
		||||
	return n, this.err
 | 
			
		||||
	if r.readPos < r.writePos || r.eof {
 | 
			
		||||
		// convert any buffered data we have, or do a final reset (for shift based conversions)
 | 
			
		||||
		bytesRead, bytesWritten, err = r.converter.Convert(r.buffer[r.readPos:r.writePos], p)
 | 
			
		||||
		r.readPos += bytesRead
 | 
			
		||||
 | 
			
		||||
		// if we experienced an iconv error and didn't make progress, report it.
 | 
			
		||||
		// if we did make progress, it may be informational only (i.e. reporting
 | 
			
		||||
		// an EILSEQ even when using //ignore to skip them)
 | 
			
		||||
		if err != nil && bytesWritten == 0 {
 | 
			
		||||
			return bytesWritten, err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// signal an EOF only if we didn't write anything - accomodates premature
 | 
			
		||||
		// errror checking in user code
 | 
			
		||||
		if bytesWritten == 0 && r.eof {
 | 
			
		||||
			return 0, io.EOF
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return bytesWritten, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (r *Reader) Reset(source io.Reader) {
 | 
			
		||||
	r.converter.Reset()
 | 
			
		||||
 | 
			
		||||
	*r = Reader{
 | 
			
		||||
		source:    source,
 | 
			
		||||
		converter: r.converter,
 | 
			
		||||
		buffer:    r.buffer,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										215
									
								
								writer.go
									
									
									
									
									
								
							
							
						
						
									
										215
									
								
								writer.go
									
									
									
									
									
								
							@ -1,82 +1,181 @@
 | 
			
		||||
package iconv
 | 
			
		||||
 | 
			
		||||
import "io"
 | 
			
		||||
import (
 | 
			
		||||
	"io"
 | 
			
		||||
	"runtime"
 | 
			
		||||
	"syscall"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	defaultWriteBufferSize = 8 * 1024
 | 
			
		||||
	minWriteBufferSize     = 16
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Writer struct {
 | 
			
		||||
	destination       io.Writer
 | 
			
		||||
	converter         *Converter
 | 
			
		||||
	buffer            []byte
 | 
			
		||||
	readPos, writePos int
 | 
			
		||||
	err               error
 | 
			
		||||
	destination             io.Writer
 | 
			
		||||
	converter               *Converter
 | 
			
		||||
	readBuffer, writeBuffer []byte
 | 
			
		||||
	readPos, writePos       int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewWriter(destination io.Writer, fromEncoding string, toEncoding string) (*Writer, error) {
 | 
			
		||||
	// create a converter
 | 
			
		||||
	converter, err := NewConverter(fromEncoding, toEncoding)
 | 
			
		||||
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		return NewWriterFromConverter(destination, converter), err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// return the error
 | 
			
		||||
	return nil, err
 | 
			
		||||
	return NewWriterSized(destination, fromEncoding, toEncoding, defaultWriteBufferSize)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewWriterFromConverter(destination io.Writer, converter *Converter) (writer *Writer) {
 | 
			
		||||
	writer = new(Writer)
 | 
			
		||||
 | 
			
		||||
	// copy elements
 | 
			
		||||
	writer.destination = destination
 | 
			
		||||
	writer.converter = converter
 | 
			
		||||
 | 
			
		||||
	// create 8K buffers
 | 
			
		||||
	writer.buffer = make([]byte, 8*1024)
 | 
			
		||||
 | 
			
		||||
	return writer
 | 
			
		||||
	return NewWriterFromConverterSized(destination, converter, defaultWriteBufferSize)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Writer) emptyBuffer() {
 | 
			
		||||
	// write new data out of buffer
 | 
			
		||||
	bytesWritten, err := this.destination.Write(this.buffer[this.readPos:this.writePos])
 | 
			
		||||
func NewWriterSized(destination io.Writer, fromEncoding, toEncoding string, size int) (*Writer, error) {
 | 
			
		||||
	converter, err := NewConverter(fromEncoding, toEncoding)
 | 
			
		||||
 | 
			
		||||
	// update read position
 | 
			
		||||
	this.readPos += bytesWritten
 | 
			
		||||
 | 
			
		||||
	// slide existing data to beginning
 | 
			
		||||
	if this.readPos > 0 {
 | 
			
		||||
		// copy current bytes - is this guaranteed safe?
 | 
			
		||||
		copy(this.buffer, this.buffer[this.readPos:this.writePos])
 | 
			
		||||
 | 
			
		||||
		// adjust positions
 | 
			
		||||
		this.writePos -= this.readPos
 | 
			
		||||
		this.readPos = 0
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// track any reader error / EOF
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		this.err = err
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// add a finalizer for the converter we created
 | 
			
		||||
	runtime.SetFinalizer(converter, finalizeConverter)
 | 
			
		||||
 | 
			
		||||
	return NewWriterFromConverterSized(destination, converter, size), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewWriterFromConverterSized(destination io.Writer, converter *Converter, size int) *Writer {
 | 
			
		||||
	if size < minWriteBufferSize {
 | 
			
		||||
		size = minWriteBufferSize
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &Writer{
 | 
			
		||||
		destination: destination,
 | 
			
		||||
		converter:   converter,
 | 
			
		||||
		readBuffer:  make([]byte, size),
 | 
			
		||||
		writeBuffer: make([]byte, size),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// implement the io.Writer interface
 | 
			
		||||
func (this *Writer) Write(p []byte) (n int, err error) {
 | 
			
		||||
	// write data into our internal buffer
 | 
			
		||||
	bytesRead, bytesWritten, err := this.converter.Convert(p, this.buffer[this.writePos:])
 | 
			
		||||
// Implements io.Writer
 | 
			
		||||
//
 | 
			
		||||
// Will attempt to convert all of p into buffer. If there's not enough room in
 | 
			
		||||
// the buffer to hold all converted bytes, the buffer will be flushed and p will
 | 
			
		||||
// continue to be processed. Close should be called on a writer when finished
 | 
			
		||||
// with all writes, to ensure final shift sequences are written and buffer is
 | 
			
		||||
// flushed to underlying io.Writer.
 | 
			
		||||
//
 | 
			
		||||
// Can return all errors that Convert can, as well as any errors from Flush. Note
 | 
			
		||||
// that some errors from Convert are suppressed if we continue making progress
 | 
			
		||||
// on p.
 | 
			
		||||
func (w *Writer) Write(p []byte) (int, error) {
 | 
			
		||||
	var totalBytesRead, bytesRead, bytesWritten int
 | 
			
		||||
	var err error
 | 
			
		||||
 | 
			
		||||
	// update bytes written for return
 | 
			
		||||
	n += bytesRead
 | 
			
		||||
	this.writePos += bytesWritten
 | 
			
		||||
	if w.readPos == 0 || len(p) == 0 {
 | 
			
		||||
		bytesRead, bytesWritten, err = w.converter.Convert(p, w.writeBuffer[w.writePos:])
 | 
			
		||||
		totalBytesRead += bytesRead
 | 
			
		||||
		w.writePos += bytesWritten
 | 
			
		||||
		w.readPos = 0
 | 
			
		||||
	} else {
 | 
			
		||||
		// we have left over bytes from previous write that weren't complete and there's at least
 | 
			
		||||
		// one byte being written, fill read buffer with p and try to convert, if we make progress
 | 
			
		||||
		// we can continue conversion from p itself
 | 
			
		||||
		bytesCopied := copy(w.readBuffer[w.readPos:], p)
 | 
			
		||||
 | 
			
		||||
	// checks for when we have a full buffer
 | 
			
		||||
	for this.writePos > 0 {
 | 
			
		||||
		// if we have an error, just return it
 | 
			
		||||
		if this.err != nil {
 | 
			
		||||
			return
 | 
			
		||||
		bytesRead, bytesWritten, err = w.converter.Convert(w.readBuffer[:w.readPos+bytesCopied], w.writeBuffer[w.writePos:])
 | 
			
		||||
 | 
			
		||||
		// if we made no progress, give up
 | 
			
		||||
		if bytesRead <= w.readPos {
 | 
			
		||||
			return 0, err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// else empty the buffer
 | 
			
		||||
		this.emptyBuffer()
 | 
			
		||||
		bytesRead -= w.readPos
 | 
			
		||||
		totalBytesRead += bytesRead
 | 
			
		||||
 | 
			
		||||
		w.readPos = 0
 | 
			
		||||
		w.writePos += bytesWritten
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return n, err
 | 
			
		||||
	// try to process all of p - lots of io functions don't like short writes.
 | 
			
		||||
	//
 | 
			
		||||
	// There are a few error cases we need to treat specially, as long as we've
 | 
			
		||||
	// made progress on p, E2BIG and EILSEQ should not be fatal. EINVAL isn't
 | 
			
		||||
	// fatal as long as the rest of p fits in our buffers.
 | 
			
		||||
	for err != nil && bytesRead > 0 {
 | 
			
		||||
		switch err {
 | 
			
		||||
		case syscall.E2BIG:
 | 
			
		||||
			err = w.Flush()
 | 
			
		||||
 | 
			
		||||
		case syscall.EILSEQ:
 | 
			
		||||
			// IGNORE suffix still reports the error on convert
 | 
			
		||||
			err = nil
 | 
			
		||||
 | 
			
		||||
			// if no more bytes, don't do an empty convert (resets state)
 | 
			
		||||
			if totalBytesRead == len(p) {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
		case syscall.EINVAL:
 | 
			
		||||
			// if the rest of p fits in read buffer copy it there
 | 
			
		||||
			if len(p[totalBytesRead:]) <= len(w.readBuffer) {
 | 
			
		||||
				w.readPos = copy(w.readBuffer, p[totalBytesRead:])
 | 
			
		||||
				totalBytesRead += w.readPos
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// if not an ignoreable err or Flush err
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		bytesRead, bytesWritten, err = w.converter.Convert(p[totalBytesRead:], w.writeBuffer[w.writePos:])
 | 
			
		||||
		totalBytesRead += bytesRead
 | 
			
		||||
		w.writePos += bytesWritten
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return totalBytesRead, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Attempt to write any buffered data to destination writer. Returns error from
 | 
			
		||||
// Write call or io.ErrShortWrite if Write didn't report an error but also didn't
 | 
			
		||||
// accept all bytes given.
 | 
			
		||||
func (w *Writer) Flush() error {
 | 
			
		||||
	if w.readPos < w.writePos {
 | 
			
		||||
		bytesWritten, err := w.destination.Write(w.writeBuffer[:w.writePos])
 | 
			
		||||
 | 
			
		||||
		if bytesWritten < 0 {
 | 
			
		||||
			panic("iconv: writer returned negative count from Write")
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if bytesWritten > 0 {
 | 
			
		||||
			w.writePos = copy(w.writeBuffer, w.writeBuffer[bytesWritten:w.writePos])
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if err == nil && w.writePos > 0 {
 | 
			
		||||
			err = io.ErrShortWrite
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Perform a final write with empty buffer, which allows iconv to close any shift
 | 
			
		||||
// sequences. A Flush is performed if needed.
 | 
			
		||||
func (w *Writer) Close() error {
 | 
			
		||||
	_, err := w.Write(nil)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return w.Flush()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Reset state and direct writes to a new destination writer
 | 
			
		||||
func (w *Writer) Reset(destination io.Writer) {
 | 
			
		||||
	w.converter.Reset()
 | 
			
		||||
 | 
			
		||||
	*w = Writer{
 | 
			
		||||
		destination: destination,
 | 
			
		||||
		converter:   w.converter,
 | 
			
		||||
		readBuffer:  w.readBuffer,
 | 
			
		||||
		writeBuffer: w.writeBuffer,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user