a84994e6e9
* add tests that cover same behaviors as Convert and ConvertString * align read and write behaviors with bufio to play nice * add methods that allow to customize buffer size * add methods to reset, allowing reuse
502 lines
13 KiB
Go
502 lines
13 KiB
Go
package iconv
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"strings"
|
|
"syscall"
|
|
"testing"
|
|
)
|
|
|
|
type iconvTest struct {
|
|
description string
|
|
input string
|
|
inputEncoding string
|
|
output string
|
|
outputEncoding string
|
|
bytesRead int
|
|
bytesWritten int
|
|
convertErr error // err from Convert (raw iconv)
|
|
err error // err from CovertString, Reader, Writer
|
|
}
|
|
|
|
var (
|
|
iconvTests = []iconvTest{
|
|
iconvTest{
|
|
"simple utf-8 to latin1 conversion success",
|
|
"Hello World!", "utf-8",
|
|
"Hello World!", "latin1",
|
|
12, 12, nil, nil,
|
|
},
|
|
iconvTest{
|
|
"invalid source encoding causes EINVAL",
|
|
"", "doesnotexist",
|
|
"", "utf-8",
|
|
0, 0, syscall.EINVAL, syscall.EINVAL,
|
|
},
|
|
iconvTest{
|
|
"invalid destination encoding causes EINVAL",
|
|
"", "utf-8",
|
|
"", "doesnotexist",
|
|
0, 0, syscall.EINVAL, syscall.EINVAL,
|
|
},
|
|
iconvTest{
|
|
"utf-8 to utf-8 passthrough",
|
|
"Hello world!", "utf-8",
|
|
"Hello world!", "utf-8",
|
|
12, 12, nil, nil,
|
|
},
|
|
iconvTest{
|
|
"utf-8 to utf-8 partial",
|
|
"Hello\xFFWorld!", "utf-8",
|
|
"Hello", "utf-8",
|
|
5, 5, syscall.EILSEQ, syscall.EILSEQ,
|
|
},
|
|
iconvTest{
|
|
"utf-8 to utf-8 ignored",
|
|
"Hello \xFFWorld!", "utf-8",
|
|
"Hello World!", "utf-8//IGNORE",
|
|
13, 12, syscall.EILSEQ, nil,
|
|
},
|
|
iconvTest{
|
|
"invalid input sequence causes EILSEQ",
|
|
"\xFF", "utf-8",
|
|
"", "latin1",
|
|
0, 0, syscall.EILSEQ, syscall.EILSEQ,
|
|
},
|
|
iconvTest{
|
|
"incomplete input sequence causes EINVAL",
|
|
"\xC2", "utf-8",
|
|
"", "latin1",
|
|
0, 0, syscall.EINVAL, syscall.EINVAL,
|
|
},
|
|
iconvTest{
|
|
"invalid input causes partial output and EILSEQ",
|
|
"Hello\xFF", "utf-8",
|
|
"Hello", "latin1",
|
|
5, 5, syscall.EILSEQ, syscall.EILSEQ,
|
|
},
|
|
iconvTest{
|
|
"incomplete input causes partial output and EILSEQ",
|
|
"Hello\xC2", "utf-8",
|
|
"Hello", "latin1",
|
|
5, 5, syscall.EINVAL, syscall.EINVAL,
|
|
},
|
|
/* this is only true for glibc / iconv
|
|
iconvTest{
|
|
"valid input but no conversion causes EILSEQ",
|
|
"你好世界 Hello World", "utf-8",
|
|
"", "latin1",
|
|
0, 0, syscall.EILSEQ, syscall.EILSEQ,
|
|
},*/
|
|
iconvTest{
|
|
"invalid input with ignore",
|
|
"Hello\xFF World!", "utf-8",
|
|
"Hello World!", "latin1//IGNORE",
|
|
13, 12, syscall.EILSEQ, nil,
|
|
},
|
|
iconvTest{
|
|
"valid input but no conversion with IGNORE",
|
|
"你好世界 Hello World", "utf-8",
|
|
" Hello World", "latin1//IGNORE",
|
|
24, 12, syscall.EILSEQ, nil,
|
|
},
|
|
iconvTest{
|
|
"valid input but no conversion with TRANSLIT",
|
|
"你好世界 Hello World", "utf-8",
|
|
"???? Hello World", "latin1//TRANSLIT",
|
|
24, 16, nil, nil,
|
|
},
|
|
}
|
|
|
|
ignoreDetected, translitDetected bool
|
|
)
|
|
|
|
func init() {
|
|
// detect if IGNORE / TRANSLIT is supported (glic / libiconv)
|
|
conv, err := NewConverter("utf-8", "ascii//IGNORE")
|
|
|
|
if err == nil {
|
|
ignoreDetected = true
|
|
conv.Close()
|
|
}
|
|
|
|
conv, err = NewConverter("utf-8", "ascii//TRANSLIT")
|
|
|
|
if err == nil {
|
|
translitDetected = true
|
|
conv.Close()
|
|
}
|
|
}
|
|
|
|
func runTests(t *testing.T, f func(iconvTest, *testing.T) (int, int, string, error)) {
|
|
for _, test := range iconvTests {
|
|
t.Run(test.description, func(t *testing.T) {
|
|
if !ignoreDetected && strings.HasSuffix(test.outputEncoding, "//IGNORE") {
|
|
t.Skip("//IGNORE not supported")
|
|
}
|
|
|
|
if !translitDetected && strings.HasSuffix(test.outputEncoding, "//TRANSLIT") {
|
|
t.Skip("//TRANSLIT not supported")
|
|
}
|
|
|
|
bytesRead, bytesWritten, output, err := f(test, t)
|
|
|
|
// check that bytesRead is same as expected
|
|
if bytesRead != test.bytesRead {
|
|
t.Errorf("bytesRead: %d expected: %d", bytesRead, test.bytesRead)
|
|
}
|
|
|
|
// check that bytesWritten is same as expected
|
|
if bytesWritten != test.bytesWritten {
|
|
t.Errorf("bytesWritten: %d expected: %d", bytesWritten, test.bytesWritten)
|
|
}
|
|
|
|
// check output bytes against expected
|
|
if output != test.output {
|
|
t.Errorf("output: %x expected: %x", output, test.output)
|
|
}
|
|
|
|
// check that err is same as expected
|
|
if err != test.err {
|
|
if test.err != nil {
|
|
if err != nil {
|
|
t.Errorf("err: %q expected: %q", err, test.err)
|
|
} else {
|
|
t.Errorf("err: nil expected %q", test.err)
|
|
}
|
|
} else {
|
|
t.Errorf("unexpected error: %q", err)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestConvert(t *testing.T) {
|
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
|
input := []byte(test.input)
|
|
output := make([]byte, 50)
|
|
|
|
// peform the conversion
|
|
bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding)
|
|
|
|
// HACK Convert has different erorrs, so check ourselves, and then fake out later check
|
|
if err != test.convertErr {
|
|
if test.convertErr != nil {
|
|
if err != nil {
|
|
t.Errorf("err: %q expected: %q", err, test.convertErr)
|
|
} else {
|
|
t.Errorf("err: nil expected %q", test.convertErr)
|
|
}
|
|
} else {
|
|
t.Errorf("unexpected error: %q", err)
|
|
}
|
|
}
|
|
err = test.err
|
|
|
|
return bytesRead, bytesWritten, string(output[:bytesWritten]), err
|
|
})
|
|
}
|
|
|
|
func TestConvertString(t *testing.T) {
|
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
|
// perform the conversion
|
|
output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding)
|
|
|
|
// bytesRead and bytesWritten are spoofed a little
|
|
return test.bytesRead, len(output), output, err
|
|
})
|
|
}
|
|
|
|
func TestReader(t *testing.T) {
|
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
|
var bytesRead, bytesWritten, finalBytesWritten int
|
|
var err error
|
|
|
|
input := bytes.NewBufferString(test.input)
|
|
output := make([]byte, 50)
|
|
|
|
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
|
|
|
|
if err == nil {
|
|
bytesWritten, err = reader.Read(output)
|
|
|
|
// we can compute how many bytes iconv read by inspecting the reader state
|
|
bytesRead = len([]byte(test.input)) - input.Len() - (reader.writePos - reader.readPos)
|
|
|
|
// with current tests and buffer sizes, we'd expect all input to be buffered if we called read
|
|
if input.Len() != 0 {
|
|
t.Error("not all bytes from input were buffered")
|
|
}
|
|
|
|
// do final read test if we can - either get EOF or same test error
|
|
if err == nil {
|
|
finalBytesWritten, err = reader.Read(output[bytesWritten:])
|
|
|
|
if finalBytesWritten != 0 {
|
|
t.Errorf("finalBytesWritten: %d expected: 0", finalBytesWritten)
|
|
}
|
|
|
|
if err == io.EOF {
|
|
err = nil
|
|
}
|
|
}
|
|
}
|
|
|
|
return bytesRead, bytesWritten, string(output[:bytesWritten]), err
|
|
})
|
|
}
|
|
|
|
func TestWriter(t *testing.T) {
|
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
|
var bytesRead, bytesWritten int
|
|
var err error
|
|
|
|
input := []byte(test.input)
|
|
output := new(bytes.Buffer)
|
|
|
|
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
|
|
|
|
if err == nil {
|
|
bytesRead, err = writer.Write(input)
|
|
bytesRead -= writer.readPos
|
|
writer.Close()
|
|
|
|
bytesWritten = output.Len()
|
|
}
|
|
|
|
return bytesRead, bytesWritten, output.String(), err
|
|
})
|
|
}
|
|
|
|
func TestReaderWithCopy(t *testing.T) {
|
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
|
input := bytes.NewBufferString(test.input)
|
|
output := new(bytes.Buffer)
|
|
|
|
reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
|
|
|
|
if err == nil {
|
|
_, err := io.Copy(output, reader)
|
|
|
|
bytesRead := len(test.input) - input.Len() - reader.writePos
|
|
bytesWritten := output.Len()
|
|
|
|
return bytesRead, bytesWritten, output.String(), err
|
|
}
|
|
|
|
return 0, 0, output.String(), err
|
|
})
|
|
}
|
|
|
|
func TestWriterWithCopy(t *testing.T) {
|
|
runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
|
|
input := bytes.NewBufferString(test.input)
|
|
output := new(bytes.Buffer)
|
|
|
|
writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
|
|
|
|
if err == nil {
|
|
bytesCopied, err := io.Copy(writer, input)
|
|
bytesRead := int(bytesCopied) - writer.readPos
|
|
writer.Close()
|
|
|
|
bytesWritten := output.Len()
|
|
|
|
return bytesRead, bytesWritten, output.String(), err
|
|
}
|
|
|
|
return 0, 0, output.String(), err
|
|
})
|
|
}
|
|
|
|
func TestReaderMultipleReads(t *testing.T) {
|
|
// setup a source reader and our expected output string
|
|
source := bytes.NewBufferString("\x80\x8A\x99\x95\x8B\x86\x87")
|
|
expected := "€Š™•‹†‡"
|
|
|
|
// setup reader - use our minimum buffer size so we can force it to shuffle the buffer around
|
|
reader, err := NewReaderSized(source, "cp1252", "utf-8", minReadBufferSize)
|
|
|
|
if err != nil {
|
|
if err == syscall.EINVAL {
|
|
t.Skip("Either cp1252 or utf-8 isn't supported by iconv on your system")
|
|
} else {
|
|
t.Fatalf("Unexpected error when creating reader: %s", err)
|
|
}
|
|
}
|
|
|
|
// setup a read buffer - we'll slice it to different sizes in our tests
|
|
buffer := make([]byte, 64)
|
|
|
|
// first read should fill internal buffer, but we'll only read part of it
|
|
bytesRead, err := reader.Read(buffer[:5])
|
|
|
|
if bytesRead != 5 || err != nil {
|
|
t.Fatalf("first read did not give expected 5, nil: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
// because of how small teh source is and our minimum buffer size, source shoudl be fully read
|
|
if source.Len() != 0 {
|
|
t.Fatalf("first read did not buffer all of source like expected: %d bytes remain", source.Len())
|
|
}
|
|
|
|
// Buffer doesn't return EOF with last bytes, reader shouldn't know its EOF yet
|
|
if reader.eof {
|
|
t.Fatalf("first read was not expected to receive EOF")
|
|
}
|
|
|
|
// second read should shift internal buffer, and fill again - make buffer too small for last utf-8 character
|
|
// E2BIG from iconv should be ignored because we wrote at least 1 byte
|
|
bytesRead, err = reader.Read(buffer[5:18])
|
|
|
|
if bytesRead != 12 || err != nil {
|
|
t.Fatalf("second read did not give expected 15, nil: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
if !reader.eof {
|
|
t.Fatalf("second read did not put reader into eof state")
|
|
}
|
|
|
|
// try to read the last 3 byte character with only a buffer of 2 bytes - this time we should see the E2BIG
|
|
bytesRead, err = reader.Read(buffer[17:19])
|
|
|
|
if bytesRead != 0 || err != syscall.E2BIG {
|
|
t.Fatalf("third read did not give expected 0, E2BIG: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
// fourth read should finish last character
|
|
bytesRead, err = reader.Read(buffer[17:])
|
|
|
|
if bytesRead != 3 || err != nil {
|
|
t.Fatalf("fourth read did not give expected 3, nil: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
// last read should be EOF
|
|
bytesRead, err = reader.Read(buffer[20:])
|
|
|
|
if bytesRead != 0 || err != io.EOF {
|
|
t.Fatalf("final read did not give expected 0, EOF: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
// check full utf-8 output
|
|
if string(buffer[:20]) != expected {
|
|
t.Fatalf("output did not match expected %q: %q", expected, string(buffer[:20]))
|
|
}
|
|
}
|
|
|
|
func TestWriteWithIncompleteSequence(t *testing.T) {
|
|
expected := "\x80\x8A\x99\x95\x8B\x86\x87"
|
|
input := []byte("€Š™•‹†‡")
|
|
output := new(bytes.Buffer)
|
|
|
|
writer, err := NewWriter(output, "utf-8", "cp1252")
|
|
|
|
if err != nil {
|
|
t.Fatalf("unexpected error while creating writer %q", err)
|
|
}
|
|
|
|
// the input string is made of 3 byte characters, for the test we want to only write part of the last character
|
|
bytesFromBuffer := len(input) - 2
|
|
|
|
bytesRead, err := writer.Write(input[:bytesFromBuffer])
|
|
|
|
if bytesRead != bytesFromBuffer {
|
|
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
// finish the rest
|
|
bytesRead, err = writer.Write(input[bytesFromBuffer:])
|
|
|
|
if bytesRead != 2 {
|
|
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
err = writer.Close()
|
|
actual := output.String()
|
|
|
|
if err != nil {
|
|
t.Errorf("got an error on close: %s", err)
|
|
}
|
|
|
|
if actual != expected {
|
|
t.Errorf("output %x did not match expected %x", actual, expected)
|
|
}
|
|
}
|
|
|
|
func TestWriteWithIncompleteSequenceAndIgnore(t *testing.T) {
|
|
if !ignoreDetected {
|
|
t.Skip("//IGNORE not supported")
|
|
}
|
|
|
|
expected := "\x80\x8A\x99\x95\x8B\x86\x87"
|
|
input := []byte("€Š™•‹†‡")
|
|
output := new(bytes.Buffer)
|
|
|
|
writer, err := NewWriter(output, "utf-8", "cp1252//IGNORE")
|
|
|
|
if err != nil {
|
|
t.Fatalf("unexpected error while creating writer %q", err)
|
|
}
|
|
|
|
// the input string is made of 3 byte characters, for the test we want to only write part of the last character
|
|
bytesFromBuffer := len(input) - 2
|
|
|
|
bytesRead, err := writer.Write(input[:bytesFromBuffer])
|
|
|
|
if bytesRead != bytesFromBuffer {
|
|
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
// finish the rest
|
|
bytesRead, err = writer.Write(input[bytesFromBuffer:])
|
|
|
|
if bytesRead != 2 {
|
|
t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
err = writer.Close()
|
|
actual := output.String()
|
|
|
|
if err != nil {
|
|
t.Errorf("got an error on close: %s", err)
|
|
}
|
|
|
|
if actual != expected {
|
|
t.Errorf("output %x did not match expected %x", actual, expected)
|
|
}
|
|
}
|
|
|
|
func TestWriteWithIncompleteSequenceAtEOF(t *testing.T) {
|
|
expected := "\x80\x8A\x99\x95\x8B\x86"
|
|
input := []byte("€Š™•‹†‡")
|
|
output := new(bytes.Buffer)
|
|
|
|
writer, err := NewWriter(output, "utf-8", "cp1252")
|
|
|
|
if err != nil {
|
|
t.Fatalf("unexpected error while creating writer %q", err)
|
|
}
|
|
|
|
// the input string is made of 3 byte characters, for the test we want to only write part of the last character
|
|
bytesFromBuffer := len(input) - 2
|
|
|
|
bytesRead, err := writer.Write(input[:bytesFromBuffer])
|
|
|
|
if bytesRead != bytesFromBuffer {
|
|
t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
|
|
}
|
|
|
|
err = writer.Close()
|
|
actual := output.String()
|
|
|
|
if err != nil {
|
|
t.Errorf("got an error on close: %s", err)
|
|
}
|
|
|
|
if actual != expected {
|
|
t.Errorf("output %x did not match expected %x", actual, expected)
|
|
}
|
|
}
|