package iconv import ( "bytes" "io" "strings" "syscall" "testing" ) type iconvTest struct { description string input string inputEncoding string output string outputEncoding string bytesRead int bytesWritten int convertErr error // err from Convert (raw iconv) err error // err from CovertString, Reader, Writer } var ( iconvTests = []iconvTest{ iconvTest{ "simple utf-8 to latin1 conversion success", "Hello World!", "utf-8", "Hello World!", "latin1", 12, 12, nil, nil, }, iconvTest{ "invalid source encoding causes EINVAL", "", "doesnotexist", "", "utf-8", 0, 0, syscall.EINVAL, syscall.EINVAL, }, iconvTest{ "invalid destination encoding causes EINVAL", "", "utf-8", "", "doesnotexist", 0, 0, syscall.EINVAL, syscall.EINVAL, }, iconvTest{ "utf-8 to utf-8 passthrough", "Hello world!", "utf-8", "Hello world!", "utf-8", 12, 12, nil, nil, }, iconvTest{ "utf-8 to utf-8 partial", "Hello\xFFWorld!", "utf-8", "Hello", "utf-8", 5, 5, syscall.EILSEQ, syscall.EILSEQ, }, iconvTest{ "utf-8 to utf-8 ignored", "Hello \xFFWorld!", "utf-8", "Hello World!", "utf-8//IGNORE", 13, 12, syscall.EILSEQ, nil, }, iconvTest{ "invalid input sequence causes EILSEQ", "\xFF", "utf-8", "", "latin1", 0, 0, syscall.EILSEQ, syscall.EILSEQ, }, iconvTest{ "incomplete input sequence causes EINVAL", "\xC2", "utf-8", "", "latin1", 0, 0, syscall.EINVAL, syscall.EINVAL, }, iconvTest{ "invalid input causes partial output and EILSEQ", "Hello\xFF", "utf-8", "Hello", "latin1", 5, 5, syscall.EILSEQ, syscall.EILSEQ, }, iconvTest{ "incomplete input causes partial output and EILSEQ", "Hello\xC2", "utf-8", "Hello", "latin1", 5, 5, syscall.EINVAL, syscall.EINVAL, }, /* this is only true for glibc / iconv iconvTest{ "valid input but no conversion causes EILSEQ", "你好世界 Hello World", "utf-8", "", "latin1", 0, 0, syscall.EILSEQ, syscall.EILSEQ, },*/ iconvTest{ "invalid input with ignore", "Hello\xFF World!", "utf-8", "Hello World!", "latin1//IGNORE", 13, 12, syscall.EILSEQ, nil, }, iconvTest{ "valid input but no conversion with IGNORE", "你好世界 Hello World", "utf-8", " Hello World", "latin1//IGNORE", 24, 12, syscall.EILSEQ, nil, }, iconvTest{ "valid input but no conversion with TRANSLIT", "你好世界 Hello World", "utf-8", "???? Hello World", "latin1//TRANSLIT", 24, 16, nil, nil, }, } ignoreDetected, translitDetected bool ) func init() { // detect if IGNORE / TRANSLIT is supported (glic / libiconv) conv, err := NewConverter("utf-8", "ascii//IGNORE") if err == nil { ignoreDetected = true conv.Close() } conv, err = NewConverter("utf-8", "ascii//TRANSLIT") if err == nil { translitDetected = true conv.Close() } } func runTests(t *testing.T, f func(iconvTest, *testing.T) (int, int, string, error)) { for _, test := range iconvTests { t.Run(test.description, func(t *testing.T) { if !ignoreDetected && strings.HasSuffix(test.outputEncoding, "//IGNORE") { t.Skip("//IGNORE not supported") } if !translitDetected && strings.HasSuffix(test.outputEncoding, "//TRANSLIT") { t.Skip("//TRANSLIT not supported") } bytesRead, bytesWritten, output, err := f(test, t) // check that bytesRead is same as expected if bytesRead != test.bytesRead { t.Errorf("bytesRead: %d expected: %d", bytesRead, test.bytesRead) } // check that bytesWritten is same as expected if bytesWritten != test.bytesWritten { t.Errorf("bytesWritten: %d expected: %d", bytesWritten, test.bytesWritten) } // check output bytes against expected if output != test.output { t.Errorf("output: %x expected: %x", output, test.output) } // check that err is same as expected if err != test.err { if test.err != nil { if err != nil { t.Errorf("err: %q expected: %q", err, test.err) } else { t.Errorf("err: nil expected %q", test.err) } } else { t.Errorf("unexpected error: %q", err) } } }) } } func TestConvert(t *testing.T) { runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { input := []byte(test.input) output := make([]byte, 50) // peform the conversion bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding) // HACK Convert has different erorrs, so check ourselves, and then fake out later check if err != test.convertErr { if test.convertErr != nil { if err != nil { t.Errorf("err: %q expected: %q", err, test.convertErr) } else { t.Errorf("err: nil expected %q", test.convertErr) } } else { t.Errorf("unexpected error: %q", err) } } err = test.err return bytesRead, bytesWritten, string(output[:bytesWritten]), err }) } func TestConvertString(t *testing.T) { runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { // perform the conversion output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding) // bytesRead and bytesWritten are spoofed a little return test.bytesRead, len(output), output, err }) } func TestReader(t *testing.T) { runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { var bytesRead, bytesWritten, finalBytesWritten int var err error input := bytes.NewBufferString(test.input) output := make([]byte, 50) reader, err := NewReader(input, test.inputEncoding, test.outputEncoding) if err == nil { bytesWritten, err = reader.Read(output) // we can compute how many bytes iconv read by inspecting the reader state bytesRead = len([]byte(test.input)) - input.Len() - (reader.writePos - reader.readPos) // with current tests and buffer sizes, we'd expect all input to be buffered if we called read if input.Len() != 0 { t.Error("not all bytes from input were buffered") } // do final read test if we can - either get EOF or same test error if err == nil { finalBytesWritten, err = reader.Read(output[bytesWritten:]) if finalBytesWritten != 0 { t.Errorf("finalBytesWritten: %d expected: 0", finalBytesWritten) } if err == io.EOF { err = nil } } } return bytesRead, bytesWritten, string(output[:bytesWritten]), err }) } func TestWriter(t *testing.T) { runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { var bytesRead, bytesWritten int var err error input := []byte(test.input) output := new(bytes.Buffer) writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding) if err == nil { bytesRead, err = writer.Write(input) bytesRead -= writer.readPos writer.Close() bytesWritten = output.Len() } return bytesRead, bytesWritten, output.String(), err }) } func TestReaderWithCopy(t *testing.T) { runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { input := bytes.NewBufferString(test.input) output := new(bytes.Buffer) reader, err := NewReader(input, test.inputEncoding, test.outputEncoding) if err == nil { _, err := io.Copy(output, reader) bytesRead := len(test.input) - input.Len() - reader.writePos bytesWritten := output.Len() return bytesRead, bytesWritten, output.String(), err } return 0, 0, output.String(), err }) } func TestWriterWithCopy(t *testing.T) { runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) { input := bytes.NewBufferString(test.input) output := new(bytes.Buffer) writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding) if err == nil { bytesCopied, err := io.Copy(writer, input) bytesRead := int(bytesCopied) - writer.readPos writer.Close() bytesWritten := output.Len() return bytesRead, bytesWritten, output.String(), err } return 0, 0, output.String(), err }) } func TestReaderMultipleReads(t *testing.T) { // setup a source reader and our expected output string source := bytes.NewBufferString("\x80\x8A\x99\x95\x8B\x86\x87") expected := "€Š™•‹†‡" // setup reader - use our minimum buffer size so we can force it to shuffle the buffer around reader, err := NewReaderSized(source, "cp1252", "utf-8", minReadBufferSize) if err != nil { if err == syscall.EINVAL { t.Skip("Either cp1252 or utf-8 isn't supported by iconv on your system") } else { t.Fatalf("Unexpected error when creating reader: %s", err) } } // setup a read buffer - we'll slice it to different sizes in our tests buffer := make([]byte, 64) // first read should fill internal buffer, but we'll only read part of it bytesRead, err := reader.Read(buffer[:5]) if bytesRead != 5 || err != nil { t.Fatalf("first read did not give expected 5, nil: %d, %s", bytesRead, err) } // because of how small teh source is and our minimum buffer size, source shoudl be fully read if source.Len() != 0 { t.Fatalf("first read did not buffer all of source like expected: %d bytes remain", source.Len()) } // Buffer doesn't return EOF with last bytes, reader shouldn't know its EOF yet if reader.eof { t.Fatalf("first read was not expected to receive EOF") } // second read should shift internal buffer, and fill again - make buffer too small for last utf-8 character // E2BIG from iconv should be ignored because we wrote at least 1 byte bytesRead, err = reader.Read(buffer[5:18]) if bytesRead != 12 || err != nil { t.Fatalf("second read did not give expected 15, nil: %d, %s", bytesRead, err) } if !reader.eof { t.Fatalf("second read did not put reader into eof state") } // try to read the last 3 byte character with only a buffer of 2 bytes - this time we should see the E2BIG bytesRead, err = reader.Read(buffer[17:19]) if bytesRead != 0 || err != syscall.E2BIG { t.Fatalf("third read did not give expected 0, E2BIG: %d, %s", bytesRead, err) } // fourth read should finish last character bytesRead, err = reader.Read(buffer[17:]) if bytesRead != 3 || err != nil { t.Fatalf("fourth read did not give expected 3, nil: %d, %s", bytesRead, err) } // last read should be EOF bytesRead, err = reader.Read(buffer[20:]) if bytesRead != 0 || err != io.EOF { t.Fatalf("final read did not give expected 0, EOF: %d, %s", bytesRead, err) } // check full utf-8 output if string(buffer[:20]) != expected { t.Fatalf("output did not match expected %q: %q", expected, string(buffer[:20])) } } func TestWriteWithIncompleteSequence(t *testing.T) { expected := "\x80\x8A\x99\x95\x8B\x86\x87" input := []byte("€Š™•‹†‡") output := new(bytes.Buffer) writer, err := NewWriter(output, "utf-8", "cp1252") if err != nil { t.Fatalf("unexpected error while creating writer %q", err) } // the input string is made of 3 byte characters, for the test we want to only write part of the last character bytesFromBuffer := len(input) - 2 bytesRead, err := writer.Write(input[:bytesFromBuffer]) if bytesRead != bytesFromBuffer { t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) } // finish the rest bytesRead, err = writer.Write(input[bytesFromBuffer:]) if bytesRead != 2 { t.Fatalf("did a short write on second write: %d, %s", bytesRead, err) } err = writer.Close() actual := output.String() if err != nil { t.Errorf("got an error on close: %s", err) } if actual != expected { t.Errorf("output %x did not match expected %x", actual, expected) } } func TestWriteWithIncompleteSequenceAndIgnore(t *testing.T) { if !ignoreDetected { t.Skip("//IGNORE not supported") } expected := "\x80\x8A\x99\x95\x8B\x86\x87" input := []byte("€Š™•‹†‡") output := new(bytes.Buffer) writer, err := NewWriter(output, "utf-8", "cp1252//IGNORE") if err != nil { t.Fatalf("unexpected error while creating writer %q", err) } // the input string is made of 3 byte characters, for the test we want to only write part of the last character bytesFromBuffer := len(input) - 2 bytesRead, err := writer.Write(input[:bytesFromBuffer]) if bytesRead != bytesFromBuffer { t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) } // finish the rest bytesRead, err = writer.Write(input[bytesFromBuffer:]) if bytesRead != 2 { t.Fatalf("did a short write on second write: %d, %s", bytesRead, err) } err = writer.Close() actual := output.String() if err != nil { t.Errorf("got an error on close: %s", err) } if actual != expected { t.Errorf("output %x did not match expected %x", actual, expected) } } func TestWriteWithIncompleteSequenceAtEOF(t *testing.T) { expected := "\x80\x8A\x99\x95\x8B\x86" input := []byte("€Š™•‹†‡") output := new(bytes.Buffer) writer, err := NewWriter(output, "utf-8", "cp1252") if err != nil { t.Fatalf("unexpected error while creating writer %q", err) } // the input string is made of 3 byte characters, for the test we want to only write part of the last character bytesFromBuffer := len(input) - 2 bytesRead, err := writer.Write(input[:bytesFromBuffer]) if bytesRead != bytesFromBuffer { t.Fatalf("did a short write on first write: %d, %s", bytesRead, err) } err = writer.Close() actual := output.String() if err != nil { t.Errorf("got an error on close: %s", err) } if actual != expected { t.Errorf("output %x did not match expected %x", actual, expected) } }