Improving documentation and including shift reset logic

2011-01-29 01:31:00 -05:00 · 2011-01-29 01:31:00 -05:00 · 5ea739d3eb
commit 5ea739d3eb
parent 20aa6d93c3
4 changed files with 206 additions and 104 deletions
--- a/8
+++ b/8
@ -1,3 +1,4 @@
 # standard GO make file preamble
 include $(GOROOT)/src/Make.inc
 # target package name
@ -6,10 +7,17 @@ TARG=iconv
 # regular go files
 GOFILES=\
 	reader.go\
 	writer.go\
 # files that must be processed by cgo
 CGOFILES=\
 	converter.go\
 	iconv.go\
 # on non glibc systems, we usually need to load the library
 ifneq ($(GOOS),linux)
 CGO_LDFLAGS=-liconv
 endif
 # standard GO make file include for packages
 include $(GOROOT)/src/Make.pkg
--- a/README.md
+++ b/README.md
@ -1,72 +1,115 @@
-Install
+# Install
 =======
-The goinstall command can be used:
+The main method of installation is through gomake (provided in $GOROOT/bin)
 	goinstall github.com/djimenez/iconv.go
 Or, you can clone the repository and use gomake instead
 	git clone git://github.com/djimenez/iconv.go.git iconv
 	cd iconv
 	gomake install
-Usage
+Alternatively, you can try using goinstall (also provided in $GOROOT/bin).
-=====
+However, because iconv.go uses cgo to wrap iconv functions, the build may not
 succeed on all systems. At time of writing goinstall was still experimental and
 has known issues with cgo based packages because of how it produces its own
 make file.
 	goinstall github.com/djimenez/iconv.go
 # Usage
 To use the package, you'll need the appropriate import statement:
 	import (
 		// if you used goinstall, you'll want this import
 		iconv "github.com/djimenez/iconv.go"
 		// if you used gomake install directly, you'll want this import
 		iconv
 		// if you used goinstall, you'll want this import
 		iconv "github.com/djimenez/iconv.go"
 	)
-Converting string Values 
+## Converting string Values 
 ------------------------
-Converting a string can be done with two methods. First, there's iconv.ConvertString(input, fromEncoding, toEncoding string)
+Converting a string can be done with two methods. First, there's
 iconv.ConvertString(input, fromEncoding, toEncoding string)
 	output,_ := iconv.ConvertString("Hello World!", "utf-8", "windows-1252")
-Alternatively, you can create a converter and use its ConvertString method. This mostly just saves having to parse the from and to encodings when converting many strings in the same way.
+Alternatively, you can create a converter and use its ConvertString method.
 Reuse of a Converter instance is recommended when doing many string conversions
 between the same encodings.
 	converter := iconv.NewConverter("utf-8", "windows-1252")
 	output,_ := converter.ConvertString("Hello World!")
-Converting []byte Values
+	// converter can then be closed explicitly
------------------------
+	// this will also happen when garbage collected
 	converter.Close()
-Converting a []byte can similarly be done with two methods. First, there's iconv.Convert(input, output []byte, fromEncoding, toEncoding string). You'll immediately notice this requires you to give it both the input and output buffer. Ideally, the output buffer should be sized so that it can hold all converted bytes from input, but if it cannot, then Convert will put as many bytes as it can into the buffer without creating an invalid sequence. For example, if iconv only has a single byte left in the output buffer but needs 2 or more for the complete character in a multibyte encoding it will stop writing to the buffer and return with an iconv.E2BIG error.
+ConvertString may return errors for the following reasons:
-	input := []byte("Hello World!")
+ * EINVAL - when either the from or to encoding is not supported by iconv
-	output := make([]byte, len(input))
+ * EILSEQ - when the input string contains an invalid byte sequence for the
   given from encoding
-	bytesRead, bytesWritten, error := iconv.Convert(input, output, "utf-8", "windows-1252")
+## Converting []byte Values
-Just like with ConvertString, there is also a Convert method on Converter that can be used.
+Converting a []byte can similarly be done with two methods. First, there's
 iconv.Convert(input, output []byte, fromEncoding, toEncoding string). You'll
 immediately notice this requires you to give it both the input and output
 buffer. Ideally, the output buffer should be sized so that it can hold all
 converted bytes from input, but if it cannot, then Convert will put as many
 bytes as it can into the buffer without creating an invalid sequence. For
 example, if iconv only has a single byte left in the output buffer but needs 2
 or more for the complete character in a multibyte encoding it will stop writing
 to the buffer and return with an iconv.E2BIG error.
 	in := []byte("Hello World!")
 	out := make([]byte, len(input))
 	bytesRead, bytesWritten, err := iconv.Convert(in, out, "utf-8", "latin1")
 Just like with ConvertString, there is also a Convert method on Converter that
 can be used.
 	...
 	converter := iconv.NewConverter("utf-8", "windows-1252")
 	bytesRead, bytesWritten, error := converter.Convert(input, output)
-Converting an *io.Reader
+Convert may return errors for the following reasons:
 ------------------------
-The iconv.Reader allows any other *io.Reader to be wrapped and have its bytes transcoded as they are read. 
+ * EINVAL - when either the from or to encoding is not supported by iconv
 * EILSEQ - when the input string contains an invalid byte sequence for the
   given from encoding
 * E2BIG - when the output buffer is not big enough to hold the full
   conversion of input
-	// We're wrapping stdin for simplicity, but a File or network reader could be wrapped as well
+   Note on E2BIG: this is a common error value especially when converting to a
   multibyte encoding and should not be considered fatal. Partial conversion
   has probably occurred be sure to check bytesRead and bytesWritten.
 ### Note on Shift Based Encodings
 When using iconv.Convert convenience method it will automatically try to append
 to your output buffer with a nil input so that any end shift sequences are
 appropiately written. Using a Converter.Convert method however will not
 automatically do this since it can be used to process a full stream in chunks.
 So you'll need to remember to pass a nil input buffer at the end yourself, just
 like you would with direct iconv usage.
 ## Converting an *io.Reader
 The iconv.Reader allows any other *io.Reader to be wrapped and have its bytes
 transcoded as they are read. 
 	// We're wrapping stdin for simplicity, but a File or network reader could
 	// be wrapped as well
 	reader,_ := iconv.NewReader(os.Stdin, "utf-8", "windows-1252")
-Converting an *io.Writer
+## Converting an *io.Writer
 ------------------------
-To be written.
+The iconv.Writer allows any other *io.Writer to be wrapped and have its bytes
 transcoded as they are written. 
-Piping a Conversion
+	// We're wrapping stdout for simplicity, but a File or network reader could
-------------------
+	// be wrapped as well
-
+	writer,_ := iconv.NewWriter(os.Stdout, "utf-8", "windows-1252")
 To be written.
--- a/converter.go
+++ b/converter.go
@ -1,25 +1,25 @@
 package iconv
 /*
 #include <iconv.h>
 #include <stdlib.h>
 #include <iconv.h>
 */
 import "C"
-
+import "os"
-import (
+import "unsafe"
 	"os"
 	"unsafe"
 )
 type Converter struct {
 	context C.iconv_t
 	open bool
 }
 // Initialize a new Converter. If fromEncoding or toEncoding are not supported by
 // iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
 // there is not enough memory to initialize an iconv descriptor
 func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err Error) {
 	converter = new(Converter)
-	// create C strings
+	// convert to C strings
 	toEncodingC := C.CString(toEncoding)
 	fromEncodingC := C.CString(fromEncoding)
@ -39,12 +39,12 @@ func NewConverter(fromEncoding string, toEncoding string) (converter *Converter,
 	return
 }
-// Called before garbage collection
+// destroy is called during garbage collection
 func (this *Converter) destroy() {
 	this.Close()
 }
-// The converter can be explicitly closed if desired
+// Close a Converter's iconv description explicitly
 func (this *Converter) Close() (err os.Error) {
 	if this.open {
 		_, err = C.iconv_close(this.context)
@ -53,73 +53,106 @@ func (this *Converter) Close() (err os.Error) {
 	return
 }
-// read bytes from an input buffer, and write them to and output buffer
+// Convert bytes from an input byte slice into a give output byte slice
 // will return the number of bytesRead from the input and the number of bytes
 // written to the output as well as any iconv errors
 //
-// NOTE: not all bytes may be consumed from the input. This can be because the output
+// As many bytes that can converted and fit into the size of output will be
-// buffer is too small or because there were iconv errors
+// processed and the number of bytes read for input as well as the number of
 // bytes written to output will be returned. If not all converted bytes can fit
 // into output and E2BIG error will also be returned. If input contains an invalid
 // sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
 //
 // For shift based output encodings, any end shift byte sequences can be generated by
 // passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
 // will simply reset the iconv descriptor shift state without writing any bytes.
 func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err Error) {
-	inputLeft := C.size_t(len(input))
+	// make sure we are still open
-	outputLeft := C.size_t(len(output))
+	if this.open {
 		inputLeft := C.size_t(len(input))
 		outputLeft := C.size_t(len(output))
-	if inputLeft > 0 && outputLeft > 0 {
+		if inputLeft > 0 && outputLeft > 0 {
-		// we're going to give iconv the pointers to the underlying
+			// we have to give iconv a pointer to a pointer of the underlying
-		// storage of each byte slice - so far this is the simplest
+			// storage of each byte slice - so far this is the simplest
-		// way i've found to do that in Go, but it seems ugly
+			// way i've found to do that in Go, but it seems ugly
-		inputFirstElementPointer := &input[0]
+			inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
-		inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
+			outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
-		outputFirstElementPointer := &output[0]
+			_,err = C.iconv(this.context, &inputPointer, &inputLeft, &outputPointer, &outputLeft)
 		outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
-		// we're only going to make one call to iconv
+			// update byte counters
-		_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
+			bytesRead = len(input) - int(inputLeft)
 			bytesWritten = len(output) - int(outputLeft)
 		} else if inputLeft == 0 && outputLeft > 0 {
 			// inputPointer will be nil, outputPointer is generated as above
 			outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
-		// update byte counters
+			_,err = C.iconv(this.context, nil, &inputLeft, &outputPointer, &outputLeft)
-		bytesRead = len(input) - int(inputLeft)
+
-		bytesWritten = len(output) - int(outputLeft)
+			// update write byte counter
 			bytesWritten = len(output) - int(outputLeft)
 		} else {
 			// both input and output are zero length, do a shift state reset
 			_,err = C.iconv(this.context, nil, &inputLeft, nil, &outputLeft)
 		}
 	} else {
 		err = EBADF
 	}
 	return bytesRead, bytesWritten, err
 }
-// convert a string value, returning a new string value
+// Convert an input string
 //
 // EILSEQ error may be returned if input contains invalid bytes for the
 // Converter's fromEncoding.
 func (this *Converter) ConvertString(input string) (output string, err Error) {
 	// make sure we are still open
 	if this.open {
 		// construct the buffers
 		inputBuffer := []byte(input)
 		outputBuffer := make([]byte, len(inputBuffer) * 2) // we use a larger buffer to help avoid resizing later
-	// construct the buffers
+		// call Convert until all input bytes are read or an error occurs
-	inputBuffer := []byte(input)
+		var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
 	outputBuffer := make([]byte, len(inputBuffer) * 2) // we use a larger buffer to help avoid resizing later
-	// call Convert until all input bytes are read or an error occurs
+		for totalBytesRead < len(inputBuffer) && err == nil {
-	var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
+			// use the totals to create buffer slices
 			bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
-	for totalBytesRead < len(inputBuffer) && err == nil {
+			totalBytesRead += bytesRead
-		bytesRead, bytesWritten, err = this.Convert(inputBuffer, outputBuffer)
+			totalBytesWritten += bytesWritten
-		totalBytesRead += bytesRead
+			// check for the E2BIG error specifically, we can add to the output
-		totalBytesWritten += bytesWritten
+			// buffer to correct for it and then continue
 			if err == E2BIG {
 				// increase the size of the output buffer by another input length
 				// first, create a new buffer
 				tempBuffer := make([]byte, len(outputBuffer) + len(inputBuffer))
-		// check for the E2BIG error specifically, we can add to the output
+				// copy the existing data
-		// buffer to correct for it and then continue
+				copy(tempBuffer, outputBuffer)
 		if err == E2BIG {
 			// increase the size of the output buffer by another input length
 			// first, create a new buffer
 			tempBuffer := make([]byte, len(outputBuffer) + len(inputBuffer))
-			// copy the existing data
+				// switch the buffers
-			copy(tempBuffer, outputBuffer)
+				outputBuffer = tempBuffer
-			// switch the buffers
+				// forget the error
-			outputBuffer = tempBuffer
+				err = nil
-
+			}
 			// forget the error
 			err = nil
 		}
 	}
-	// construct the final output string
+		if err == nil {
-	output = string(outputBuffer[:totalBytesWritten])
+			// perform a final shift state reset
 			_, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
 			// update total count
 			totalBytesWritten += bytesWritten
 		}
 		// construct the final output string
 		output = string(outputBuffer[:totalBytesWritten])
 	} else {
 		err = EBADF
 	}
 	return output, err
 }
--- a/iconv.go
+++ b/iconv.go
@ -1,30 +1,47 @@
 /*
 Wraps the iconv API present on most systems, which allows for conversion
 of bytes from one encoding to another. This package additionally provides
 some convenient interface implementations like a Reader and Writer.
 */
 package iconv
 /*
 #include <errno.h>
 */
 import "C"
 import "os"
-import (
+// Alias os.Error for convenience
 	"os"
 )
 // allows us to check for iconv specific errors
 type Error os.Error
 // Error codes returned from iconv functions
 var (
 	EILSEQ Error = os.Errno(int(C.EILSEQ))
 	E2BIG Error = os.Errno(int(C.E2BIG))
 	EBADF Error = os.Errno(int(C.EBADF))
 	EINVAL Error = os.Errno(int(C.EINVAL))
 	EILSEQ Error = os.Errno(int(C.EILSEQ))
 	ENOMEM Error = os.Errno(int(C.ENOMEM))
 )
 // All in one Convert method, rather than requiring the construction of an iconv.Converter
 func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err Error) {
-	// create a new converter
+	// create a temporary converter
 	converter, err := NewConverter(fromEncoding, toEncoding)
 	if err == nil {
-		// call Convert
+		// call converter's Convert
 		bytesRead, bytesWritten, err = converter.Convert(input, output)
 		if err == nil {
 			var shiftBytesWritten int
 			// call Convert with a nil input to generate any end shift sequences
 			_, shiftBytesWritten, err = converter.Convert(nil, output[bytesWritten:])
 			// add shift bytes to total bytes
 			bytesWritten += shiftBytesWritten
 		}
 		// close the converter
 		converter.Close()
 	}
@ -32,8 +49,9 @@ func Convert(input []byte, output []byte, fromEncoding string, toEncoding string
 	return
 }
 // All in one ConvertString method, rather than requiring the construction of an iconv.Converter
 func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err Error) {
-	// create a new converter
+	// create a temporary converter
 	converter, err := NewConverter(fromEncoding, toEncoding)
 	if err == nil {