* Added README.md

* moved sample programs to examples directory * cleaned up make make file * converter.go: ConvertString now uses Convert under the hood, removes some code duplication * reader.go: No need to have two separate buffers, can write directly in to buffer given in Read call. Simplifies code greatly
2011-01-15 04:06:50 -05:00 · 2011-01-15 04:06:50 -05:00 · 690531c87e
commit 690531c87e
parent 82db0fae9a
8 changed files with 164 additions and 129 deletions
--- a/29
+++ b/29
@ -1,34 +1,15 @@
-# Copyright 2009 The Go Authors.  All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
 include $(GOROOT)/src/Make.inc

+# target package name
 TARG=iconv

+# regular go files
 GOFILES=\
-	reader.go
+	reader.go\

+# files that must be processed by cgo
 CGOFILES=\
+	converter.go\
 	iconv.go\
-	converter.go
-
-ifeq ($(GOOS),windows)
-CGO_LDFLAGS=-liconv
-endif
-
-# To add flags necessary for locating the library or its include files,
-# set CGO_CFLAGS or CGO_LDFLAGS.  For example, to use an
-# alternate installation of the library:
-#	CGO_CFLAGS=-I/home/rsc/gmp32/include
-#	CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
-# Note the += on the second line.
-
-CLEANFILES+=sample

 include $(GOROOT)/src/Make.pkg
-
-# simple test program to test iconv conversion
-sample: install sample.go
-	$(GC) $@.go
-	$(LD) -o $@ $@.$O
--- a/README.md
+++ b/README.md
@ -0,0 +1,72 @@
+Install
+=======
+
+The goinstall command can be used:
+
+	goinstall github.com/djimenez/iconv.go
+
+Or, you can clone the repository and use gomake instead
+
+	git clone git://github.com/djimenez/iconv.go.git iconv
+	cd iconv
+	gomake install
+
+Usage
+=====
+
+To use the package, you'll need the appropriate import statement:
+
+	import (
+		// if you used goinstall, you'll want this import
+		iconv "github.com/djimenez/iconv.go"
+
+		// if you used gomake install directly, you'll want this import
+		iconv
+	)
+
+Converting string Values 
+------------------------
+
+Converting a string can be done with two methods. First, there's iconv.ConvertString(input, fromEncoding, toEncoding string)
+
+	output,_ := iconv.ConvertString("Hello World!", "utf-8", "windows-1252")
+
+Alternatively, you can create a converter and use its ConvertString method. This mostly just saves having to parse the from and to encodings when converting many strings in the same way.
+
+	converter := iconv.NewConverter("utf-8", "windows-1252")
+	output,_ := converter.ConvertString("Hello World!")
+
+Converting []byte Values
+------------------------
+
+Converting a []byte can similarly be done with two methods. First, there's iconv.Convert(input, output []byte, fromEncoding, toEncoding string). You'll immediately notice this requires you to give it both the input and output buffer. Ideally, the output buffer should be sized so that it can hold all converted bytes from input, but if it cannot, then Convert will put as many bytes as it can into the buffer without creating an invalid sequence. For example, if iconv only has a single byte left in the output buffer but needs 2 or more for the complete character in a multibyte encoding it will stop writing to the buffer and return with an iconv.E2BIG error.
+
+	input := []byte("Hello World!")
+	output := make([]byte, len(input))
+	
+	bytesRead, bytesWritten, error := iconv.Convert(input, output, "utf-8", "windows-1252")
+
+Just like with ConvertString, there is also a Convert method on Converter that can be used.
+
+	...
+	converter := iconv.NewConverter("utf-8", "windows-1252")
+	
+	bytesRead, bytesWritten, error := converter.Convert(input, output)
+
+Converting an *io.Reader
+------------------------
+
+The iconv.Reader allows any other *io.Reader to be wrapped and have its bytes transcoded as they are read. 
+
+	// We're wrapping stdin for simplicity, but a File or network reader could be wrapped as well
+	reader,_ := iconv.NewReader(os.Stdin, "utf-8", "windows-1252")
+
+Converting an *io.Writer
+------------------------
+
+To be written.
+
+Piping a Conversion
+-------------------
+
+To be written.
--- a/converter.go
+++ b/converter.go
@ -13,7 +13,7 @@ type Converter struct {
 	open bool
 }

-func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) {
+func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err Error) {
 	converter = new(Converter)

 	converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
@ -47,21 +47,21 @@ func (this *Converter) Close() (err os.Error) {
 //
 // NOTE: not all bytes may be consumed from the input. This can be because the output
 // buffer is too small or because there were iconv errors
-func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) {
+func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err Error) {
 	inputLeft := C.size_t(len(input))
 	outputLeft := C.size_t(len(output))
-
-	// we're going to give iconv the pointers to the underlying
-	// storage of each byte slice - so far this is the simplest
-	// way i've found to do that in Go, but it seems ugly
-	inputFirstElementPointer := &input[0]
-	inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
-
-	outputFirstElementPointer := &output[0]
-	outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
-
-	// we're only going to make one call to iconv
+	
 	if inputLeft > 0 && outputLeft > 0 {
+		// we're going to give iconv the pointers to the underlying
+		// storage of each byte slice - so far this is the simplest
+		// way i've found to do that in Go, but it seems ugly
+		inputFirstElementPointer := &input[0]
+		inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
+
+		outputFirstElementPointer := &output[0]
+		outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
+
+		// we're only going to make one call to iconv
 		_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)

 		// update byte counters
@ -72,59 +72,42 @@ func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, byte
 	return bytesRead, bytesWritten, err
 }

-// convert the bytes of a string and return the resulting string
-//
-// TODO: can we do this in terms of Convert function
-func (this *Converter) ConvertString(input string) (output string, err os.Error) {
-	// both our input buffer and output buffer will be the same size
-	// but we'll reuse our output buffer each time its filled
-	bufferSize := len(input)
-	sourceLeft := C.size_t(bufferSize)
-	outputLeft := sourceLeft
-	outputReset := outputLeft
+// convert a string value, returning a new string value
+func (this *Converter) ConvertString(input string) (output string, err Error) {

-	// our input buffer is the source string, but iconv will track
-	// how many bytes has left to process
-	sourceBuffer := C.CString(input)
-	sourcePointer := &sourceBuffer
+	// construct the buffers
+	inputBuffer := []byte(input)
+	outputBuffer := make([]byte, len(inputBuffer) * 2) // we use a larger buffer to help avoid resizing later

-	outputBuffer := make([]byte, bufferSize)
-	outputFirstPointer := &outputBuffer[0] 
-	outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))
+	// call Convert until all input bytes are read or an error occurs
+	var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int

-	// process the source with iconv in a loop
-	for sourceLeft > 0 {
-		//fmt.Println("calling to iconv")
-		_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)
+	for totalBytesRead < len(inputBuffer) && err == nil {
+		bytesRead, bytesWritten, err = this.Convert(inputBuffer, outputBuffer)

-		//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft))
+		totalBytesRead += bytesRead
+		totalBytesWritten += bytesWritten

-		// check the err - most interested if we need to expand the output buffer
-		if err != nil {
-			//fmt.Println("got error value: ", err)
+		// check for the E2BIG error specifically, we can add to the output
+		// buffer to correct for it and then continue
+		if err == E2BIG {
+			// increase the size of the output buffer by another input length
+			// first, create a new buffer
+			tempBuffer := make([]byte, len(outputBuffer) + len(inputBuffer))
+			
+			// copy the existing data
+			copy(tempBuffer, outputBuffer)

-			if err == E2BIG {
-				// we need more output buffer to continue
-				// instead of resizing, lets pull what we got so far
-				// and set outputLeft back to the buffer size
-				output += string(outputBuffer[0:bufferSize - int(outputLeft)])
-				outputLeft = outputReset
-			} else {
-				// we got an error we can't continue with
-				break
-			}
+			// switch the buffers
+			outputBuffer = tempBuffer
+
+			// forget the error
+			err = nil
 		}
 	}

-	// free our sourceBuffer, no longer needed
-	//C.free(unsafe.Pointer(&sourceBuffer))
+	// construct the final output string
+	output = string(outputBuffer[:totalBytesWritten])

-	// convert output buffer a go string
-	output += string(outputBuffer[0:bufferSize - int(outputLeft)])
-
-	// free our outputBuffer, no longer needed
-	//C.free(unsafe.Pointer(&outputBuffer))	
-	
-	// return result and any err
 	return output, err
 }
--- a/examples/sample.ebcdic-us
+++ b/examples/sample.ebcdic-us
--- a/examples/sample.go
+++ b/examples/sample.go
--- a/examples/sample.utf8
+++ b/examples/sample.utf8
--- a/iconv.go
+++ b/iconv.go
@ -15,7 +15,7 @@ var (
 	E2BIG Error = os.Errno(int(C.E2BIG))
 )

-func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) {
+func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err Error) {
 	// create a new converter
 	converter, err := NewConverter(fromEncoding, toEncoding)

@ -30,7 +30,7 @@ func Convert(input []byte, output []byte, fromEncoding string, toEncoding string
 	return
 }

-func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) {
+func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err Error) {
 	// create a new converter
 	converter, err := NewConverter(fromEncoding, toEncoding)

--- a/reader.go
+++ b/reader.go
@ -8,10 +8,9 @@ import (
 type Reader struct {
 	source io.Reader
 	converter *Converter
-	rawBuffer []byte
-	rawReadPos, rawWritePos int
-	convertedBuffer []byte
-	convertedReadPos, convertedWritePos int
+	buffer []byte
+	readPos, writePos int
+	err os.Error
 }

 func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
@ -34,68 +33,68 @@ func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Rea
 	reader.converter = converter

 	// create 8K buffers
-	reader.rawBuffer = make([]byte, 8 * 1024)
-	reader.convertedBuffer = make([]byte, 8 * 1024)
+	reader.buffer = make([]byte, 8 * 1024)

 	return reader
 }

-func (this *Reader) fillRawBuffer() {
+func (this *Reader) fillBuffer() {
 	// slide existing data to beginning
-	if this.rawReadPos > 0 {
-		// copy current bytes
-		copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos])
+	if this.readPos > 0 {
+		// copy current bytes - is this guaranteed safe?
+		copy(this.buffer, this.buffer[this.readPos:this.writePos])

 		// adjust positions
-		this.rawWritePos -= this.rawReadPos
-		this.rawReadPos = 0
+		this.writePos -= this.readPos
+		this.readPos = 0
 	}

 	// read new data into buffer at write position
-	bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:])
+	bytesRead, err := this.source.Read(this.buffer[this.writePos:])

 	// adjust write position
-	this.rawWritePos += bytesRead
+	this.writePos += bytesRead

-	// track source reader errors
+	// track any reader error / EOF
 	if err != nil {
-		// not sure where to put this for now
-	}
-}
-
-func (this *Reader) fillConvertedBuffer() {
-	// slide existing data to beginning
-	if this.convertedReadPos > 0 {
-		// copy current bytes
-		copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
-
-		// adjust positions
-		this.convertedWritePos -= this.convertedReadPos
-		this.convertedReadPos = 0
-	}
-
-	// use iconv to fill the converted buffer from the raw buffer
-	bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
-
-	// adjust read and write positions
-	this.rawReadPos += bytesRead
-	this.convertedWritePos += bytesWritten
-
-	// track iconv convert errors
-	if err != nil {
-		// not sure where to put this for now
+		this.err = err
 	}
 }

 // implement the io.Reader interface
 func (this *Reader) Read(p []byte) (n int, err os.Error) {
-	this.fillRawBuffer()
-	this.fillConvertedBuffer()
+	// checks for when we have no data
+	for this.writePos == 0 || this.readPos == this.writePos {
+		// if we have an error / EOF, just return it
+		if this.err != nil {
+			return n, this.err
+		}

-	if this.convertedWritePos - 1 > this.convertedReadPos {
-		// copy converted bytes into p
-		n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
+		// else, fill our buffer
+		this.fillBuffer()
+	}	
+
+	// TODO: checks for when we have less data than len(p)
+
+	// we should have an appropriate amount of data, convert it into the given buffer
+	bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p)
+
+	// adjust byte counters
+	this.readPos += bytesRead
+	n += bytesWritten
+
+	// if we experienced an iconv error, check it
+	if err != nil {
+		// E2BIG errors can be ignored (we'll get them often) as long
+		// as at least 1 byte was written. If we experienced an E2BIG
+		// and no bytes were written then the buffer is too small for
+		// even the next character
+		if err != E2BIG || bytesWritten == 0 {
+			// track anything else
+			this.err = err
+		}
 	}

-	return
+	// return our results
+	return n, this.err
 }