From 331deca0a8bb6242b94d49af4e0e2e0a3d47d426 Mon Sep 17 00:00:00 2001 From: MoZhonghua Date: Sat, 18 Jun 2016 17:02:05 +0800 Subject: [PATCH 1/3] Fix #17: NewReader can't process data bigger than 8K This shoud also fix issue #25: When the amount of data will be truncated iconv() will return EINVAL when An incomplete multibyte sequence is encountered in the input, and the input byte sequence terminates after it. So if the input is larger than the internal buffer of Reader and the end of the buffer conatins partial multi-byte chars, then Reader will failed with EINVAL. So when iconv() return EINVAL, we checks whether there are more data to process, if so, we continue without report an error to user. --- reader.go | 21 +++++++++++++++++++-- reader_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 reader_test.go diff --git a/reader.go b/reader.go index 2835ce6..a1d85a2 100644 --- a/reader.go +++ b/reader.go @@ -5,6 +5,8 @@ import ( "syscall" ) +const bufferSize = 8 * 1024 + type Reader struct { source io.Reader converter *Converter @@ -33,12 +35,12 @@ func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Rea reader.converter = converter // create 8K buffers - reader.buffer = make([]byte, 8*1024) + reader.buffer = make([]byte, bufferSize) return reader } -func (this *Reader) fillBuffer() { +func (this *Reader) fillBuffer() int { // slide existing data to beginning if this.readPos > 0 { // copy current bytes - is this guaranteed safe? @@ -58,6 +60,9 @@ func (this *Reader) fillBuffer() { // track any reader error / EOF if err != nil { this.err = err + return -1 + } else { + return bytesRead } } @@ -85,6 +90,18 @@ func (this *Reader) Read(p []byte) (n int, err error) { // if we experienced an iconv error, check it if err != nil { + // EINVAL: + // An incomplete multibyte sequence is encountered in the input, + // and the input byte sequence terminates after it. + if err == syscall.EINVAL { + // If we can read new data, then this should NOT be + // considered as an error. + newData := this.fillBuffer() + if newData > 0 { + return n, nil + } + } + // E2BIG errors can be ignored (we'll get them often) as long // as at least 1 byte was written. If we experienced an E2BIG // and no bytes were written then the buffer is too small for diff --git a/reader_test.go b/reader_test.go new file mode 100644 index 0000000..643bfd5 --- /dev/null +++ b/reader_test.go @@ -0,0 +1,38 @@ +package iconv + +import ( + "bytes" + "io/ioutil" + "testing" +) + +func GbkToUtf8(src []byte) ([]byte, error) { + reader, err := NewReader(bytes.NewReader(src), "gbk", "utf-8") + if err != nil { + return nil, err + } + return ioutil.ReadAll(reader) +} + +func Utf8ToGbk(src []byte) ([]byte, error) { + reader, err := NewReader(bytes.NewReader(src), "utf-8", "gbk") + reader.buffer = make([]byte, 16) + if err != nil { + return nil, err + } + return ioutil.ReadAll(reader) +} + +func TestReaderWithDataLargerThanBuffer(t *testing.T) { + chars := []byte("梅") + for len(chars) < bufferSize*2 { + t.Logf("input size: %d", len(chars)) + chars = append(chars, chars...) + _, err := Utf8ToGbk(chars) + if err != nil { + t.Fail() + t.Logf("failed with %d bytes data", len(chars)) + return + } + } +} From 9ec1b936e53fcfbe5b248eb8f007f13d6342758a Mon Sep 17 00:00:00 2001 From: MoZhonghua Date: Thu, 23 Jun 2016 10:56:59 +0800 Subject: [PATCH 2/3] fillBuffer() should not return -1 when EOF --- reader.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/reader.go b/reader.go index a1d85a2..e0a6d60 100644 --- a/reader.go +++ b/reader.go @@ -60,10 +60,8 @@ func (this *Reader) fillBuffer() int { // track any reader error / EOF if err != nil { this.err = err - return -1 - } else { - return bytesRead } + return bytesRead } // implement the io.Reader interface From 0efdb40d5723661af7fc5d7e153f096de410a6aa Mon Sep 17 00:00:00 2001 From: MoZhonghua Date: Fri, 24 Jun 2016 10:54:53 +0800 Subject: [PATCH 3/3] Fix truncated data --- reader.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/reader.go b/reader.go index e0a6d60..a649f6a 100644 --- a/reader.go +++ b/reader.go @@ -107,6 +107,13 @@ func (this *Reader) Read(p []byte) (n int, err error) { if err != syscall.E2BIG || bytesWritten == 0 { // track anything else this.err = err + } else { + // Should not return this.err + // If we got EOF from source in last fillBuffer() call, and + // there is still more data to process in buffer, in this + // case, if we return this.err(=EOF), then data in buffer + // will be lost. + return n, nil } }