* Added README.md

* moved sample programs to examples directory * cleaned up make make file * converter.go: ConvertString now uses Convert under the hood, removes some code duplication * reader.go: No need to have two separate buffers, can write directly in to buffer given in Read call. Simplifies code greatly
2011-01-15 04:06:50 -05:00 · 2011-01-15 04:06:50 -05:00 · 690531c87e
commit 690531c87e
parent 82db0fae9a
8 changed files with 164 additions and 129 deletions
--- a/29
+++ b/29
@ -1,34 +1,15 @@
 # Copyright 2009 The Go Authors.  All rights reserved.
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 include $(GOROOT)/src/Make.inc
 # target package name
 TARG=iconv
 # regular go files
 GOFILES=\
-	reader.go
+	reader.go\
 # files that must be processed by cgo
 CGOFILES=\
 	converter.go\
 	iconv.go\
 	converter.go
 ifeq ($(GOOS),windows)
 CGO_LDFLAGS=-liconv
 endif
 # To add flags necessary for locating the library or its include files,
 # set CGO_CFLAGS or CGO_LDFLAGS.  For example, to use an
 # alternate installation of the library:
 #	CGO_CFLAGS=-I/home/rsc/gmp32/include
 #	CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
 # Note the += on the second line.
 CLEANFILES+=sample
 include $(GOROOT)/src/Make.pkg
 # simple test program to test iconv conversion
 sample: install sample.go
 	$(GC) $@.go
 	$(LD) -o $@ $@.$O
--- a/README.md
+++ b/README.md
@ -0,0 +1,72 @@
 Install
 =======
 The goinstall command can be used:
 	goinstall github.com/djimenez/iconv.go
 Or, you can clone the repository and use gomake instead
 	git clone git://github.com/djimenez/iconv.go.git iconv
 	cd iconv
 	gomake install
 Usage
 =====
 To use the package, you'll need the appropriate import statement:
 	import (
 		// if you used goinstall, you'll want this import
 		iconv "github.com/djimenez/iconv.go"
 		// if you used gomake install directly, you'll want this import
 		iconv
 	)
 Converting string Values 
 ------------------------
 Converting a string can be done with two methods. First, there's iconv.ConvertString(input, fromEncoding, toEncoding string)
 	output,_ := iconv.ConvertString("Hello World!", "utf-8", "windows-1252")
 Alternatively, you can create a converter and use its ConvertString method. This mostly just saves having to parse the from and to encodings when converting many strings in the same way.
 	converter := iconv.NewConverter("utf-8", "windows-1252")
 	output,_ := converter.ConvertString("Hello World!")
 Converting []byte Values
 ------------------------
 Converting a []byte can similarly be done with two methods. First, there's iconv.Convert(input, output []byte, fromEncoding, toEncoding string). You'll immediately notice this requires you to give it both the input and output buffer. Ideally, the output buffer should be sized so that it can hold all converted bytes from input, but if it cannot, then Convert will put as many bytes as it can into the buffer without creating an invalid sequence. For example, if iconv only has a single byte left in the output buffer but needs 2 or more for the complete character in a multibyte encoding it will stop writing to the buffer and return with an iconv.E2BIG error.
 	input := []byte("Hello World!")
 	output := make([]byte, len(input))
 	bytesRead, bytesWritten, error := iconv.Convert(input, output, "utf-8", "windows-1252")
 Just like with ConvertString, there is also a Convert method on Converter that can be used.
 	...
 	converter := iconv.NewConverter("utf-8", "windows-1252")
 	bytesRead, bytesWritten, error := converter.Convert(input, output)
 Converting an *io.Reader
 ------------------------
 The iconv.Reader allows any other *io.Reader to be wrapped and have its bytes transcoded as they are read. 
 	// We're wrapping stdin for simplicity, but a File or network reader could be wrapped as well
 	reader,_ := iconv.NewReader(os.Stdin, "utf-8", "windows-1252")
 Converting an *io.Writer
 ------------------------
 To be written.
 Piping a Conversion
 -------------------
 To be written.
--- a/converter.go
+++ b/converter.go
@ -13,7 +13,7 @@ type Converter struct {
 	open bool
 }
-func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) {
+func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err Error) {
 	converter = new(Converter)
 	converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
@ -47,21 +47,21 @@ func (this *Converter) Close() (err os.Error) {
 //
 // NOTE: not all bytes may be consumed from the input. This can be because the output
 // buffer is too small or because there were iconv errors
-func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) {
+func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err Error) {
 	inputLeft := C.size_t(len(input))
 	outputLeft := C.size_t(len(output))
 	// we're going to give iconv the pointers to the underlying
 	// storage of each byte slice - so far this is the simplest
 	// way i've found to do that in Go, but it seems ugly
 	inputFirstElementPointer := &input[0]
 	inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
 	outputFirstElementPointer := &output[0]
 	outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
 	// we're only going to make one call to iconv
 	if inputLeft > 0 && outputLeft > 0 {
 		// we're going to give iconv the pointers to the underlying
 		// storage of each byte slice - so far this is the simplest
 		// way i've found to do that in Go, but it seems ugly
 		inputFirstElementPointer := &input[0]
 		inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
 		outputFirstElementPointer := &output[0]
 		outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
 		// we're only going to make one call to iconv
 		_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
 		// update byte counters
@ -72,59 +72,42 @@ func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, byte
 	return bytesRead, bytesWritten, err
 }
-// convert the bytes of a string and return the resulting string
+// convert a string value, returning a new string value
-//
+func (this *Converter) ConvertString(input string) (output string, err Error) {
 // TODO: can we do this in terms of Convert function
 func (this *Converter) ConvertString(input string) (output string, err os.Error) {
 	// both our input buffer and output buffer will be the same size
 	// but we'll reuse our output buffer each time its filled
 	bufferSize := len(input)
 	sourceLeft := C.size_t(bufferSize)
 	outputLeft := sourceLeft
 	outputReset := outputLeft
-	// our input buffer is the source string, but iconv will track
+	// construct the buffers
-	// how many bytes has left to process
+	inputBuffer := []byte(input)
-	sourceBuffer := C.CString(input)
+	outputBuffer := make([]byte, len(inputBuffer) * 2) // we use a larger buffer to help avoid resizing later
 	sourcePointer := &sourceBuffer
-	outputBuffer := make([]byte, bufferSize)
+	// call Convert until all input bytes are read or an error occurs
-	outputFirstPointer := &outputBuffer[0] 
+	var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
 	outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))
-	// process the source with iconv in a loop
+	for totalBytesRead < len(inputBuffer) && err == nil {
-	for sourceLeft > 0 {
+		bytesRead, bytesWritten, err = this.Convert(inputBuffer, outputBuffer)
 		//fmt.Println("calling to iconv")
 		_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)
-		//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft))
+		totalBytesRead += bytesRead
 		totalBytesWritten += bytesWritten
-		// check the err - most interested if we need to expand the output buffer
+		// check for the E2BIG error specifically, we can add to the output
-		if err != nil {
+		// buffer to correct for it and then continue
-			//fmt.Println("got error value: ", err)
+		if err == E2BIG {
 			// increase the size of the output buffer by another input length
 			// first, create a new buffer
 			tempBuffer := make([]byte, len(outputBuffer) + len(inputBuffer))
-			if err == E2BIG {
+			// copy the existing data
-				// we need more output buffer to continue
+			copy(tempBuffer, outputBuffer)
-				// instead of resizing, lets pull what we got so far
+
-				// and set outputLeft back to the buffer size
+			// switch the buffers
-				output += string(outputBuffer[0:bufferSize - int(outputLeft)])
+			outputBuffer = tempBuffer
-				outputLeft = outputReset
+
-			} else {
+			// forget the error
-				// we got an error we can't continue with
+			err = nil
 				break
 			}
 		}
 	}
-	// free our sourceBuffer, no longer needed
+	// construct the final output string
-	//C.free(unsafe.Pointer(&sourceBuffer))
+	output = string(outputBuffer[:totalBytesWritten])
 	// convert output buffer a go string
 	output += string(outputBuffer[0:bufferSize - int(outputLeft)])
 	// free our outputBuffer, no longer needed
 	//C.free(unsafe.Pointer(&outputBuffer))	
 	// return result and any err
 	return output, err
 }
--- a/examples/sample.ebcdic-us
+++ b/examples/sample.ebcdic-us
--- a/examples/sample.go
+++ b/examples/sample.go
--- a/examples/sample.utf8
+++ b/examples/sample.utf8
--- a/iconv.go
+++ b/iconv.go
@ -15,7 +15,7 @@ var (
 	E2BIG Error = os.Errno(int(C.E2BIG))
 )
-func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) {
+func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err Error) {
 	// create a new converter
 	converter, err := NewConverter(fromEncoding, toEncoding)
@ -30,7 +30,7 @@ func Convert(input []byte, output []byte, fromEncoding string, toEncoding string
 	return
 }
-func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) {
+func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err Error) {
 	// create a new converter
 	converter, err := NewConverter(fromEncoding, toEncoding)
--- a/reader.go
+++ b/reader.go
@ -8,10 +8,9 @@ import (
 type Reader struct {
 	source io.Reader
 	converter *Converter
-	rawBuffer []byte
+	buffer []byte
-	rawReadPos, rawWritePos int
+	readPos, writePos int
-	convertedBuffer []byte
+	err os.Error
 	convertedReadPos, convertedWritePos int
 }
 func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
@ -34,68 +33,68 @@ func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Rea
 	reader.converter = converter
 	// create 8K buffers
-	reader.rawBuffer = make([]byte, 8 * 1024)
+	reader.buffer = make([]byte, 8 * 1024)
 	reader.convertedBuffer = make([]byte, 8 * 1024)
 	return reader
 }
-func (this *Reader) fillRawBuffer() {
+func (this *Reader) fillBuffer() {
 	// slide existing data to beginning
-	if this.rawReadPos > 0 {
+	if this.readPos > 0 {
-		// copy current bytes
+		// copy current bytes - is this guaranteed safe?
-		copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos])
+		copy(this.buffer, this.buffer[this.readPos:this.writePos])
 		// adjust positions
-		this.rawWritePos -= this.rawReadPos
+		this.writePos -= this.readPos
-		this.rawReadPos = 0
+		this.readPos = 0
 	}
 	// read new data into buffer at write position
-	bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:])
+	bytesRead, err := this.source.Read(this.buffer[this.writePos:])
 	// adjust write position
-	this.rawWritePos += bytesRead
+	this.writePos += bytesRead
-	// track source reader errors
+	// track any reader error / EOF
 	if err != nil {
-		// not sure where to put this for now
+		this.err = err
 	}
 }
 func (this *Reader) fillConvertedBuffer() {
 	// slide existing data to beginning
 	if this.convertedReadPos > 0 {
 		// copy current bytes
 		copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
 		// adjust positions
 		this.convertedWritePos -= this.convertedReadPos
 		this.convertedReadPos = 0
 	}
 	// use iconv to fill the converted buffer from the raw buffer
 	bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
 	// adjust read and write positions
 	this.rawReadPos += bytesRead
 	this.convertedWritePos += bytesWritten
 	// track iconv convert errors
 	if err != nil {
 		// not sure where to put this for now
 	}
 }
 // implement the io.Reader interface
 func (this *Reader) Read(p []byte) (n int, err os.Error) {
-	this.fillRawBuffer()
+	// checks for when we have no data
-	this.fillConvertedBuffer()
+	for this.writePos == 0 || this.readPos == this.writePos {
 		// if we have an error / EOF, just return it
 		if this.err != nil {
 			return n, this.err
 		}
-	if this.convertedWritePos - 1 > this.convertedReadPos {
+		// else, fill our buffer
-		// copy converted bytes into p
+		this.fillBuffer()
 		n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
 	}	
-	return
+	// TODO: checks for when we have less data than len(p)
 	// we should have an appropriate amount of data, convert it into the given buffer
 	bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p)
 	// adjust byte counters
 	this.readPos += bytesRead
 	n += bytesWritten
 	// if we experienced an iconv error, check it
 	if err != nil {
 		// E2BIG errors can be ignored (we'll get them often) as long
 		// as at least 1 byte was written. If we experienced an E2BIG
 		// and no bytes were written then the buffer is too small for
 		// even the next character
 		if err != E2BIG || bytesWritten == 0 {
 			// track anything else
 			this.err = err
 		}
 	}
 	// return our results
 	return n, this.err
 }