Initial iconv go package, supports:

* string conversion * byte slice conversion * Reader conversion
2011-01-14 18:34:30 -05:00 · 2011-01-14 18:34:30 -05:00 · 82db0fae9a
commit 82db0fae9a
7 changed files with 419 additions and 0 deletions
--- a/34
+++ b/34
@ -0,0 +1,34 @@
+# Copyright 2009 The Go Authors.  All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include $(GOROOT)/src/Make.inc
+
+TARG=iconv
+
+GOFILES=\
+	reader.go
+
+CGOFILES=\
+	iconv.go\
+	converter.go
+
+ifeq ($(GOOS),windows)
+CGO_LDFLAGS=-liconv
+endif
+
+# To add flags necessary for locating the library or its include files,
+# set CGO_CFLAGS or CGO_LDFLAGS.  For example, to use an
+# alternate installation of the library:
+#	CGO_CFLAGS=-I/home/rsc/gmp32/include
+#	CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
+# Note the += on the second line.
+
+CLEANFILES+=sample
+
+include $(GOROOT)/src/Make.pkg
+
+# simple test program to test iconv conversion
+sample: install sample.go
+	$(GC) $@.go
+	$(LD) -o $@ $@.$O
--- a/converter.go
+++ b/converter.go
@ -0,0 +1,130 @@
+package iconv
+
+// #include <iconv.h>
+import "C"
+
+import (
+	"os"
+	"unsafe"
+)
+
+type Converter struct {
+	context C.iconv_t
+	open bool
+}
+
+func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) {
+	converter = new(Converter)
+
+	converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
+
+	// check err
+	if err == nil {
+		// no error, mark the context as open
+		converter.open = true
+	}
+
+	return
+}
+
+// Called before garbage collection
+func (this *Converter) destroy() {
+	this.Close()
+}
+
+// The converter can be explicitly closed if desired
+func (this *Converter) Close() (err os.Error) {
+	if this.open {
+		_, err = C.iconv_close(this.context)
+	}
+
+	return
+}
+
+// read bytes from an input buffer, and write them to and output buffer
+// will return the number of bytesRead from the input and the number of bytes
+// written to the output as well as any iconv errors
+//
+// NOTE: not all bytes may be consumed from the input. This can be because the output
+// buffer is too small or because there were iconv errors
+func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) {
+	inputLeft := C.size_t(len(input))
+	outputLeft := C.size_t(len(output))
+
+	// we're going to give iconv the pointers to the underlying
+	// storage of each byte slice - so far this is the simplest
+	// way i've found to do that in Go, but it seems ugly
+	inputFirstElementPointer := &input[0]
+	inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
+
+	outputFirstElementPointer := &output[0]
+	outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
+
+	// we're only going to make one call to iconv
+	if inputLeft > 0 && outputLeft > 0 {
+		_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
+
+		// update byte counters
+		bytesRead = len(input) - int(inputLeft)
+		bytesWritten = len(output) - int(outputLeft)
+	}
+	
+	return bytesRead, bytesWritten, err
+}
+
+// convert the bytes of a string and return the resulting string
+//
+// TODO: can we do this in terms of Convert function
+func (this *Converter) ConvertString(input string) (output string, err os.Error) {
+	// both our input buffer and output buffer will be the same size
+	// but we'll reuse our output buffer each time its filled
+	bufferSize := len(input)
+	sourceLeft := C.size_t(bufferSize)
+	outputLeft := sourceLeft
+	outputReset := outputLeft
+
+	// our input buffer is the source string, but iconv will track
+	// how many bytes has left to process
+	sourceBuffer := C.CString(input)
+	sourcePointer := &sourceBuffer
+
+	outputBuffer := make([]byte, bufferSize)
+	outputFirstPointer := &outputBuffer[0] 
+	outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))
+
+	// process the source with iconv in a loop
+	for sourceLeft > 0 {
+		//fmt.Println("calling to iconv")
+		_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)
+
+		//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft))
+
+		// check the err - most interested if we need to expand the output buffer
+		if err != nil {
+			//fmt.Println("got error value: ", err)
+
+			if err == E2BIG {
+				// we need more output buffer to continue
+				// instead of resizing, lets pull what we got so far
+				// and set outputLeft back to the buffer size
+				output += string(outputBuffer[0:bufferSize - int(outputLeft)])
+				outputLeft = outputReset
+			} else {
+				// we got an error we can't continue with
+				break
+			}
+		}
+	}
+
+	// free our sourceBuffer, no longer needed
+	//C.free(unsafe.Pointer(&sourceBuffer))
+
+	// convert output buffer a go string
+	output += string(outputBuffer[0:bufferSize - int(outputLeft)])
+
+	// free our outputBuffer, no longer needed
+	//C.free(unsafe.Pointer(&outputBuffer))	
+	
+	// return result and any err
+	return output, err
+}
--- a/iconv.go
+++ b/iconv.go
@ -0,0 +1,46 @@
+package iconv
+
+// #include <errno.h>
+import "C"
+
+import (
+	"os"
+)
+
+// allows us to check for iconv specific errors
+type Error os.Error
+
+var (
+	EILSEQ Error = os.Errno(int(C.EILSEQ))
+	E2BIG Error = os.Errno(int(C.E2BIG))
+)
+
+func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) {
+	// create a new converter
+	converter, err := NewConverter(fromEncoding, toEncoding)
+
+	if err == nil {
+		// call Convert
+		bytesRead, bytesWritten, err = converter.Convert(input, output)
+
+		// close the converter
+		converter.Close()
+	}
+
+	return
+}
+
+func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) {
+	// create a new converter
+	converter, err := NewConverter(fromEncoding, toEncoding)
+
+	if err == nil {
+		// convert the string
+		output, err = converter.ConvertString(input)
+
+		// close the converter
+		converter.Close()
+	}
+
+	return
+}
--- a/reader.go
+++ b/reader.go
@ -0,0 +1,101 @@
+package iconv
+
+import ( 
+	"io"
+	"os"
+)
+
+type Reader struct {
+	source io.Reader
+	converter *Converter
+	rawBuffer []byte
+	rawReadPos, rawWritePos int
+	convertedBuffer []byte
+	convertedReadPos, convertedWritePos int
+}
+
+func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
+	// create a converter
+	converter, err := NewConverter(fromEncoding, toEncoding)
+
+	if err == nil {
+		return NewReaderFromConverter(source, converter), err
+	}
+
+	// return the error
+	return nil, err
+}
+
+func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) {
+	reader = new(Reader)
+
+	// copy elements
+	reader.source = source
+	reader.converter = converter
+
+	// create 8K buffers
+	reader.rawBuffer = make([]byte, 8 * 1024)
+	reader.convertedBuffer = make([]byte, 8 * 1024)
+
+	return reader
+}
+
+func (this *Reader) fillRawBuffer() {
+	// slide existing data to beginning
+	if this.rawReadPos > 0 {
+		// copy current bytes
+		copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos])
+
+		// adjust positions
+		this.rawWritePos -= this.rawReadPos
+		this.rawReadPos = 0
+	}
+
+	// read new data into buffer at write position
+	bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:])
+
+	// adjust write position
+	this.rawWritePos += bytesRead
+
+	// track source reader errors
+	if err != nil {
+		// not sure where to put this for now
+	}
+}
+
+func (this *Reader) fillConvertedBuffer() {
+	// slide existing data to beginning
+	if this.convertedReadPos > 0 {
+		// copy current bytes
+		copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
+
+		// adjust positions
+		this.convertedWritePos -= this.convertedReadPos
+		this.convertedReadPos = 0
+	}
+
+	// use iconv to fill the converted buffer from the raw buffer
+	bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
+
+	// adjust read and write positions
+	this.rawReadPos += bytesRead
+	this.convertedWritePos += bytesWritten
+
+	// track iconv convert errors
+	if err != nil {
+		// not sure where to put this for now
+	}
+}
+
+// implement the io.Reader interface
+func (this *Reader) Read(p []byte) (n int, err os.Error) {
+	this.fillRawBuffer()
+	this.fillConvertedBuffer()
+
+	if this.convertedWritePos - 1 > this.convertedReadPos {
+		// copy converted bytes into p
+		n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
+	}
+
+	return
+}
--- a/sample.ebcdic-us
+++ b/sample.ebcdic-us
@ -0,0 +1 @@
+Č…““–@ć–™“„Z%
--- a/sample.go
+++ b/sample.go
@ -0,0 +1,106 @@
+package main
+
+import ( 
+	"encoding/hex"
+	"io/ioutil"
+	"iconv"
+	"fmt"
+	"os"
+)
+
+func main() {
+	// read bytes from sample.utf8
+	utf8Bytes, err := ioutil.ReadFile("sample.utf8")
+
+	if (err != nil) {
+		fmt.Println("Could not open 'sample.utf8': ", err)
+	}
+
+	// read bytes from sample.ebcdic-us
+	ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us")
+
+	if err != nil {
+		fmt.Println("Could not open 'sample.ebcdic-us': ", err)
+	}
+
+	// use iconv to check conversions both ways
+	utf8String := string(utf8Bytes)
+	ebcdicString := string(ebcdicBytes)
+
+	// convert from utf-8 to ebcdic
+	utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us")
+
+	if err != nil || ebcdicString != utf8ConvertedString {
+		// generate hex string
+		ebcdicHexString := hex.EncodeToString(ebcdicBytes)
+		utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString))
+
+		fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err)
+		fmt.Println(ebcdicHexString, " - ", len(ebcdicString))
+		fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString))
+	} else {
+		fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString")
+	}
+
+	// convert from ebcdic to utf-8
+	ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8")
+
+	if err != nil || utf8String != ebcdicConvertedString {
+		// generate hex string
+		utf8HexString := hex.EncodeToString(utf8Bytes)
+		ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString))
+
+		fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err)
+		fmt.Println(utf8HexString, " - ", len(utf8String))
+		fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString))
+	} else {
+		fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString")
+	}
+
+	testBuffer := make([]byte, len(ebcdicBytes) * 2)
+
+	// convert from ebdic bytes to utf-8 bytes
+	bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8")
+
+	if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) {
+		fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err)
+	} else {
+		fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert")
+	}
+
+	// convert from utf-8 bytes to ebcdic bytes
+	bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us")
+
+	if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) {
+		fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err)
+	} else {
+		fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert")
+	}
+
+	// test iconv.Reader
+	utf8File,_ := os.Open("sample.utf8", os.O_RDONLY, 0)
+	utf8Reader,_ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us")
+	bytesRead, err = utf8Reader.Read(testBuffer)
+
+	if err != nil || bytesRead != len(ebcdicBytes) {
+		fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err)
+	} else {
+		fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader")
+	}
+
+	ebcdicFile,_ := os.Open("sample.ebcdic-us", os.O_RDONLY, 0)
+	ebcdicReader,_ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8")	
+	bytesRead, err = ebcdicReader.Read(testBuffer)
+
+	if err != nil || bytesRead != len(utf8Bytes) {
+		fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err)
+
+		if bytesRead > 0 {
+			fmt.Println(string(testBuffer[:bytesRead]))
+			fmt.Println(hex.EncodeToString(testBuffer[:bytesRead]))
+			fmt.Println(hex.EncodeToString(utf8Bytes))
+		}
+	} else {
+		fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader")
+	}
+}
--- a/sample.utf8
+++ b/sample.utf8
@ -0,0 +1 @@
+Hello World!