From 82db0fae9af15d86900db92fd65af691102decc2 Mon Sep 17 00:00:00 2001 From: Donovan Jimenez Date: Fri, 14 Jan 2011 18:34:30 -0500 Subject: [PATCH] Initial iconv go package, supports: * string conversion * byte slice conversion * Reader conversion --- Makefile | 34 +++++++++++++ converter.go | 130 +++++++++++++++++++++++++++++++++++++++++++++++ iconv.go | 46 +++++++++++++++++ reader.go | 101 ++++++++++++++++++++++++++++++++++++ sample.ebcdic-us | 1 + sample.go | 106 ++++++++++++++++++++++++++++++++++++++ sample.utf8 | 1 + 7 files changed, 419 insertions(+) create mode 100644 Makefile create mode 100644 converter.go create mode 100644 iconv.go create mode 100644 reader.go create mode 100644 sample.ebcdic-us create mode 100644 sample.go create mode 100644 sample.utf8 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9b6fcd8 --- /dev/null +++ b/Makefile @@ -0,0 +1,34 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include $(GOROOT)/src/Make.inc + +TARG=iconv + +GOFILES=\ + reader.go + +CGOFILES=\ + iconv.go\ + converter.go + +ifeq ($(GOOS),windows) +CGO_LDFLAGS=-liconv +endif + +# To add flags necessary for locating the library or its include files, +# set CGO_CFLAGS or CGO_LDFLAGS. For example, to use an +# alternate installation of the library: +# CGO_CFLAGS=-I/home/rsc/gmp32/include +# CGO_LDFLAGS+=-L/home/rsc/gmp32/lib +# Note the += on the second line. + +CLEANFILES+=sample + +include $(GOROOT)/src/Make.pkg + +# simple test program to test iconv conversion +sample: install sample.go + $(GC) $@.go + $(LD) -o $@ $@.$O diff --git a/converter.go b/converter.go new file mode 100644 index 0000000..2077881 --- /dev/null +++ b/converter.go @@ -0,0 +1,130 @@ +package iconv + +// #include +import "C" + +import ( + "os" + "unsafe" +) + +type Converter struct { + context C.iconv_t + open bool +} + +func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) { + converter = new(Converter) + + converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding)) + + // check err + if err == nil { + // no error, mark the context as open + converter.open = true + } + + return +} + +// Called before garbage collection +func (this *Converter) destroy() { + this.Close() +} + +// The converter can be explicitly closed if desired +func (this *Converter) Close() (err os.Error) { + if this.open { + _, err = C.iconv_close(this.context) + } + + return +} + +// read bytes from an input buffer, and write them to and output buffer +// will return the number of bytesRead from the input and the number of bytes +// written to the output as well as any iconv errors +// +// NOTE: not all bytes may be consumed from the input. This can be because the output +// buffer is too small or because there were iconv errors +func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) { + inputLeft := C.size_t(len(input)) + outputLeft := C.size_t(len(output)) + + // we're going to give iconv the pointers to the underlying + // storage of each byte slice - so far this is the simplest + // way i've found to do that in Go, but it seems ugly + inputFirstElementPointer := &input[0] + inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer)) + + outputFirstElementPointer := &output[0] + outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer)) + + // we're only going to make one call to iconv + if inputLeft > 0 && outputLeft > 0 { + _,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft) + + // update byte counters + bytesRead = len(input) - int(inputLeft) + bytesWritten = len(output) - int(outputLeft) + } + + return bytesRead, bytesWritten, err +} + +// convert the bytes of a string and return the resulting string +// +// TODO: can we do this in terms of Convert function +func (this *Converter) ConvertString(input string) (output string, err os.Error) { + // both our input buffer and output buffer will be the same size + // but we'll reuse our output buffer each time its filled + bufferSize := len(input) + sourceLeft := C.size_t(bufferSize) + outputLeft := sourceLeft + outputReset := outputLeft + + // our input buffer is the source string, but iconv will track + // how many bytes has left to process + sourceBuffer := C.CString(input) + sourcePointer := &sourceBuffer + + outputBuffer := make([]byte, bufferSize) + outputFirstPointer := &outputBuffer[0] + outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer)) + + // process the source with iconv in a loop + for sourceLeft > 0 { + //fmt.Println("calling to iconv") + _,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft) + + //fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft)) + + // check the err - most interested if we need to expand the output buffer + if err != nil { + //fmt.Println("got error value: ", err) + + if err == E2BIG { + // we need more output buffer to continue + // instead of resizing, lets pull what we got so far + // and set outputLeft back to the buffer size + output += string(outputBuffer[0:bufferSize - int(outputLeft)]) + outputLeft = outputReset + } else { + // we got an error we can't continue with + break + } + } + } + + // free our sourceBuffer, no longer needed + //C.free(unsafe.Pointer(&sourceBuffer)) + + // convert output buffer a go string + output += string(outputBuffer[0:bufferSize - int(outputLeft)]) + + // free our outputBuffer, no longer needed + //C.free(unsafe.Pointer(&outputBuffer)) + + // return result and any err + return output, err +} diff --git a/iconv.go b/iconv.go new file mode 100644 index 0000000..1ea4d00 --- /dev/null +++ b/iconv.go @@ -0,0 +1,46 @@ +package iconv + +// #include +import "C" + +import ( + "os" +) + +// allows us to check for iconv specific errors +type Error os.Error + +var ( + EILSEQ Error = os.Errno(int(C.EILSEQ)) + E2BIG Error = os.Errno(int(C.E2BIG)) +) + +func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) { + // create a new converter + converter, err := NewConverter(fromEncoding, toEncoding) + + if err == nil { + // call Convert + bytesRead, bytesWritten, err = converter.Convert(input, output) + + // close the converter + converter.Close() + } + + return +} + +func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) { + // create a new converter + converter, err := NewConverter(fromEncoding, toEncoding) + + if err == nil { + // convert the string + output, err = converter.ConvertString(input) + + // close the converter + converter.Close() + } + + return +} diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..ac77e09 --- /dev/null +++ b/reader.go @@ -0,0 +1,101 @@ +package iconv + +import ( + "io" + "os" +) + +type Reader struct { + source io.Reader + converter *Converter + rawBuffer []byte + rawReadPos, rawWritePos int + convertedBuffer []byte + convertedReadPos, convertedWritePos int +} + +func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) { + // create a converter + converter, err := NewConverter(fromEncoding, toEncoding) + + if err == nil { + return NewReaderFromConverter(source, converter), err + } + + // return the error + return nil, err +} + +func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) { + reader = new(Reader) + + // copy elements + reader.source = source + reader.converter = converter + + // create 8K buffers + reader.rawBuffer = make([]byte, 8 * 1024) + reader.convertedBuffer = make([]byte, 8 * 1024) + + return reader +} + +func (this *Reader) fillRawBuffer() { + // slide existing data to beginning + if this.rawReadPos > 0 { + // copy current bytes + copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos]) + + // adjust positions + this.rawWritePos -= this.rawReadPos + this.rawReadPos = 0 + } + + // read new data into buffer at write position + bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:]) + + // adjust write position + this.rawWritePos += bytesRead + + // track source reader errors + if err != nil { + // not sure where to put this for now + } +} + +func (this *Reader) fillConvertedBuffer() { + // slide existing data to beginning + if this.convertedReadPos > 0 { + // copy current bytes + copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos]) + + // adjust positions + this.convertedWritePos -= this.convertedReadPos + this.convertedReadPos = 0 + } + + // use iconv to fill the converted buffer from the raw buffer + bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:]) + + // adjust read and write positions + this.rawReadPos += bytesRead + this.convertedWritePos += bytesWritten + + // track iconv convert errors + if err != nil { + // not sure where to put this for now + } +} + +// implement the io.Reader interface +func (this *Reader) Read(p []byte) (n int, err os.Error) { + this.fillRawBuffer() + this.fillConvertedBuffer() + + if this.convertedWritePos - 1 > this.convertedReadPos { + // copy converted bytes into p + n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos]) + } + + return +} diff --git a/sample.ebcdic-us b/sample.ebcdic-us new file mode 100644 index 0000000..4f66be0 --- /dev/null +++ b/sample.ebcdic-us @@ -0,0 +1 @@ +È…““–@æ–™“„Z% \ No newline at end of file diff --git a/sample.go b/sample.go new file mode 100644 index 0000000..767e337 --- /dev/null +++ b/sample.go @@ -0,0 +1,106 @@ +package main + +import ( + "encoding/hex" + "io/ioutil" + "iconv" + "fmt" + "os" +) + +func main() { + // read bytes from sample.utf8 + utf8Bytes, err := ioutil.ReadFile("sample.utf8") + + if (err != nil) { + fmt.Println("Could not open 'sample.utf8': ", err) + } + + // read bytes from sample.ebcdic-us + ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us") + + if err != nil { + fmt.Println("Could not open 'sample.ebcdic-us': ", err) + } + + // use iconv to check conversions both ways + utf8String := string(utf8Bytes) + ebcdicString := string(ebcdicBytes) + + // convert from utf-8 to ebcdic + utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us") + + if err != nil || ebcdicString != utf8ConvertedString { + // generate hex string + ebcdicHexString := hex.EncodeToString(ebcdicBytes) + utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString)) + + fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err) + fmt.Println(ebcdicHexString, " - ", len(ebcdicString)) + fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString)) + } else { + fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString") + } + + // convert from ebcdic to utf-8 + ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8") + + if err != nil || utf8String != ebcdicConvertedString { + // generate hex string + utf8HexString := hex.EncodeToString(utf8Bytes) + ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString)) + + fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err) + fmt.Println(utf8HexString, " - ", len(utf8String)) + fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString)) + } else { + fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString") + } + + testBuffer := make([]byte, len(ebcdicBytes) * 2) + + // convert from ebdic bytes to utf-8 bytes + bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8") + + if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) { + fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err) + } else { + fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert") + } + + // convert from utf-8 bytes to ebcdic bytes + bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us") + + if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) { + fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err) + } else { + fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert") + } + + // test iconv.Reader + utf8File,_ := os.Open("sample.utf8", os.O_RDONLY, 0) + utf8Reader,_ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us") + bytesRead, err = utf8Reader.Read(testBuffer) + + if err != nil || bytesRead != len(ebcdicBytes) { + fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err) + } else { + fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader") + } + + ebcdicFile,_ := os.Open("sample.ebcdic-us", os.O_RDONLY, 0) + ebcdicReader,_ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8") + bytesRead, err = ebcdicReader.Read(testBuffer) + + if err != nil || bytesRead != len(utf8Bytes) { + fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err) + + if bytesRead > 0 { + fmt.Println(string(testBuffer[:bytesRead])) + fmt.Println(hex.EncodeToString(testBuffer[:bytesRead])) + fmt.Println(hex.EncodeToString(utf8Bytes)) + } + } else { + fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader") + } +} diff --git a/sample.utf8 b/sample.utf8 new file mode 100644 index 0000000..980a0d5 --- /dev/null +++ b/sample.utf8 @@ -0,0 +1 @@ +Hello World!