Fix #17: NewReader can't process data bigger than 8K

This shoud also fix issue #25: When the amount of data will be truncated

iconv() will return EINVAL when An incomplete multibyte sequence is
encountered in the input, and the input byte sequence terminates after
it. So if the input is larger than the internal buffer of Reader and
the end of the buffer conatins partial multi-byte chars, then Reader
will failed with EINVAL.

So when iconv() return EINVAL, we checks whether there are more data to
process, if so, we continue without report an error to user.
This commit is contained in:
MoZhonghua 2016-06-18 17:02:05 +08:00
parent 8960e66bd3
commit 331deca0a8
2 changed files with 57 additions and 2 deletions

View File

@ -5,6 +5,8 @@ import (
"syscall" "syscall"
) )
const bufferSize = 8 * 1024
type Reader struct { type Reader struct {
source io.Reader source io.Reader
converter *Converter converter *Converter
@ -33,12 +35,12 @@ func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Rea
reader.converter = converter reader.converter = converter
// create 8K buffers // create 8K buffers
reader.buffer = make([]byte, 8*1024) reader.buffer = make([]byte, bufferSize)
return reader return reader
} }
func (this *Reader) fillBuffer() { func (this *Reader) fillBuffer() int {
// slide existing data to beginning // slide existing data to beginning
if this.readPos > 0 { if this.readPos > 0 {
// copy current bytes - is this guaranteed safe? // copy current bytes - is this guaranteed safe?
@ -58,6 +60,9 @@ func (this *Reader) fillBuffer() {
// track any reader error / EOF // track any reader error / EOF
if err != nil { if err != nil {
this.err = err this.err = err
return -1
} else {
return bytesRead
} }
} }
@ -85,6 +90,18 @@ func (this *Reader) Read(p []byte) (n int, err error) {
// if we experienced an iconv error, check it // if we experienced an iconv error, check it
if err != nil { if err != nil {
// EINVAL:
// An incomplete multibyte sequence is encountered in the input,
// and the input byte sequence terminates after it.
if err == syscall.EINVAL {
// If we can read new data, then this should NOT be
// considered as an error.
newData := this.fillBuffer()
if newData > 0 {
return n, nil
}
}
// E2BIG errors can be ignored (we'll get them often) as long // E2BIG errors can be ignored (we'll get them often) as long
// as at least 1 byte was written. If we experienced an E2BIG // as at least 1 byte was written. If we experienced an E2BIG
// and no bytes were written then the buffer is too small for // and no bytes were written then the buffer is too small for

38
reader_test.go Normal file
View File

@ -0,0 +1,38 @@
package iconv
import (
"bytes"
"io/ioutil"
"testing"
)
func GbkToUtf8(src []byte) ([]byte, error) {
reader, err := NewReader(bytes.NewReader(src), "gbk", "utf-8")
if err != nil {
return nil, err
}
return ioutil.ReadAll(reader)
}
func Utf8ToGbk(src []byte) ([]byte, error) {
reader, err := NewReader(bytes.NewReader(src), "utf-8", "gbk")
reader.buffer = make([]byte, 16)
if err != nil {
return nil, err
}
return ioutil.ReadAll(reader)
}
func TestReaderWithDataLargerThanBuffer(t *testing.T) {
chars := []byte("梅")
for len(chars) < bufferSize*2 {
t.Logf("input size: %d", len(chars))
chars = append(chars, chars...)
_, err := Utf8ToGbk(chars)
if err != nil {
t.Fail()
t.Logf("failed with %d bytes data", len(chars))
return
}
}
}