iconv-go/reader.go
MoZhonghua 331deca0a8 Fix #17: NewReader can't process data bigger than 8K
This shoud also fix issue #25: When the amount of data will be truncated

iconv() will return EINVAL when An incomplete multibyte sequence is
encountered in the input, and the input byte sequence terminates after
it. So if the input is larger than the internal buffer of Reader and
the end of the buffer conatins partial multi-byte chars, then Reader
will failed with EINVAL.

So when iconv() return EINVAL, we checks whether there are more data to
process, if so, we continue without report an error to user.
2016-06-18 17:02:05 +08:00

118 lines
2.7 KiB
Go

package iconv
import (
"io"
"syscall"
)
const bufferSize = 8 * 1024
type Reader struct {
source io.Reader
converter *Converter
buffer []byte
readPos, writePos int
err error
}
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, error) {
// create a converter
converter, err := NewConverter(fromEncoding, toEncoding)
if err == nil {
return NewReaderFromConverter(source, converter), err
}
// return the error
return nil, err
}
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) {
reader = new(Reader)
// copy elements
reader.source = source
reader.converter = converter
// create 8K buffers
reader.buffer = make([]byte, bufferSize)
return reader
}
func (this *Reader) fillBuffer() int {
// slide existing data to beginning
if this.readPos > 0 {
// copy current bytes - is this guaranteed safe?
copy(this.buffer, this.buffer[this.readPos:this.writePos])
// adjust positions
this.writePos -= this.readPos
this.readPos = 0
}
// read new data into buffer at write position
bytesRead, err := this.source.Read(this.buffer[this.writePos:])
// adjust write position
this.writePos += bytesRead
// track any reader error / EOF
if err != nil {
this.err = err
return -1
} else {
return bytesRead
}
}
// implement the io.Reader interface
func (this *Reader) Read(p []byte) (n int, err error) {
// checks for when we have no data
for this.writePos == 0 || this.readPos == this.writePos {
// if we have an error / EOF, just return it
if this.err != nil {
return n, this.err
}
// else, fill our buffer
this.fillBuffer()
}
// TODO: checks for when we have less data than len(p)
// we should have an appropriate amount of data, convert it into the given buffer
bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p)
// adjust byte counters
this.readPos += bytesRead
n += bytesWritten
// if we experienced an iconv error, check it
if err != nil {
// EINVAL:
// An incomplete multibyte sequence is encountered in the input,
// and the input byte sequence terminates after it.
if err == syscall.EINVAL {
// If we can read new data, then this should NOT be
// considered as an error.
newData := this.fillBuffer()
if newData > 0 {
return n, nil
}
}
// E2BIG errors can be ignored (we'll get them often) as long
// as at least 1 byte was written. If we experienced an E2BIG
// and no bytes were written then the buffer is too small for
// even the next character
if err != syscall.E2BIG || bytesWritten == 0 {
// track anything else
this.err = err
}
}
// return our results
return n, this.err
}