Fix #17: NewReader can't process data bigger than 8K
This shoud also fix issue #25: When the amount of data will be truncated iconv() will return EINVAL when An incomplete multibyte sequence is encountered in the input, and the input byte sequence terminates after it. So if the input is larger than the internal buffer of Reader and the end of the buffer conatins partial multi-byte chars, then Reader will failed with EINVAL. So when iconv() return EINVAL, we checks whether there are more data to process, if so, we continue without report an error to user.
This commit is contained in:
		
							parent
							
								
									8960e66bd3
								
							
						
					
					
						commit
						331deca0a8
					
				
							
								
								
									
										21
									
								
								reader.go
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								reader.go
									
									
									
									
									
								
							@ -5,6 +5,8 @@ import (
 | 
			
		||||
	"syscall"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const bufferSize = 8 * 1024
 | 
			
		||||
 | 
			
		||||
type Reader struct {
 | 
			
		||||
	source            io.Reader
 | 
			
		||||
	converter         *Converter
 | 
			
		||||
@ -33,12 +35,12 @@ func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Rea
 | 
			
		||||
	reader.converter = converter
 | 
			
		||||
 | 
			
		||||
	// create 8K buffers
 | 
			
		||||
	reader.buffer = make([]byte, 8*1024)
 | 
			
		||||
	reader.buffer = make([]byte, bufferSize)
 | 
			
		||||
 | 
			
		||||
	return reader
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (this *Reader) fillBuffer() {
 | 
			
		||||
func (this *Reader) fillBuffer() int {
 | 
			
		||||
	// slide existing data to beginning
 | 
			
		||||
	if this.readPos > 0 {
 | 
			
		||||
		// copy current bytes - is this guaranteed safe?
 | 
			
		||||
@ -58,6 +60,9 @@ func (this *Reader) fillBuffer() {
 | 
			
		||||
	// track any reader error / EOF
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		this.err = err
 | 
			
		||||
		return -1
 | 
			
		||||
	} else {
 | 
			
		||||
		return bytesRead
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -85,6 +90,18 @@ func (this *Reader) Read(p []byte) (n int, err error) {
 | 
			
		||||
 | 
			
		||||
	// if we experienced an iconv error, check it
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		// EINVAL:
 | 
			
		||||
		// An incomplete multibyte sequence is encountered in the input,
 | 
			
		||||
		// and the input byte sequence terminates after it.
 | 
			
		||||
		if err == syscall.EINVAL {
 | 
			
		||||
			// If we can read new data, then this should NOT be
 | 
			
		||||
			// considered as an error.
 | 
			
		||||
			newData := this.fillBuffer()
 | 
			
		||||
			if newData > 0 {
 | 
			
		||||
				return n, nil
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// E2BIG errors can be ignored (we'll get them often) as long
 | 
			
		||||
		// as at least 1 byte was written. If we experienced an E2BIG
 | 
			
		||||
		// and no bytes were written then the buffer is too small for
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										38
									
								
								reader_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								reader_test.go
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,38 @@
 | 
			
		||||
package iconv
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"testing"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func GbkToUtf8(src []byte) ([]byte, error) {
 | 
			
		||||
	reader, err := NewReader(bytes.NewReader(src), "gbk", "utf-8")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	return ioutil.ReadAll(reader)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Utf8ToGbk(src []byte) ([]byte, error) {
 | 
			
		||||
	reader, err := NewReader(bytes.NewReader(src), "utf-8", "gbk")
 | 
			
		||||
	reader.buffer = make([]byte, 16)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	return ioutil.ReadAll(reader)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReaderWithDataLargerThanBuffer(t *testing.T) {
 | 
			
		||||
	chars := []byte("梅")
 | 
			
		||||
	for len(chars) < bufferSize*2 {
 | 
			
		||||
		t.Logf("input size: %d", len(chars))
 | 
			
		||||
		chars = append(chars, chars...)
 | 
			
		||||
		_, err := Utf8ToGbk(chars)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			t.Fail()
 | 
			
		||||
			t.Logf("failed with %d bytes data", len(chars))
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user