Initial iconv go package, supports:

* string conversion
 * byte slice conversion
 * Reader conversion
This commit is contained in:
Donovan Jimenez 2011-01-14 18:34:30 -05:00
commit 82db0fae9a
7 changed files with 419 additions and 0 deletions

34
Makefile Normal file
View File

@ -0,0 +1,34 @@
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include $(GOROOT)/src/Make.inc
TARG=iconv
GOFILES=\
reader.go
CGOFILES=\
iconv.go\
converter.go
ifeq ($(GOOS),windows)
CGO_LDFLAGS=-liconv
endif
# To add flags necessary for locating the library or its include files,
# set CGO_CFLAGS or CGO_LDFLAGS. For example, to use an
# alternate installation of the library:
# CGO_CFLAGS=-I/home/rsc/gmp32/include
# CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
# Note the += on the second line.
CLEANFILES+=sample
include $(GOROOT)/src/Make.pkg
# simple test program to test iconv conversion
sample: install sample.go
$(GC) $@.go
$(LD) -o $@ $@.$O

130
converter.go Normal file
View File

@ -0,0 +1,130 @@
package iconv
// #include <iconv.h>
import "C"
import (
"os"
"unsafe"
)
type Converter struct {
context C.iconv_t
open bool
}
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) {
converter = new(Converter)
converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
// check err
if err == nil {
// no error, mark the context as open
converter.open = true
}
return
}
// Called before garbage collection
func (this *Converter) destroy() {
this.Close()
}
// The converter can be explicitly closed if desired
func (this *Converter) Close() (err os.Error) {
if this.open {
_, err = C.iconv_close(this.context)
}
return
}
// read bytes from an input buffer, and write them to and output buffer
// will return the number of bytesRead from the input and the number of bytes
// written to the output as well as any iconv errors
//
// NOTE: not all bytes may be consumed from the input. This can be because the output
// buffer is too small or because there were iconv errors
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) {
inputLeft := C.size_t(len(input))
outputLeft := C.size_t(len(output))
// we're going to give iconv the pointers to the underlying
// storage of each byte slice - so far this is the simplest
// way i've found to do that in Go, but it seems ugly
inputFirstElementPointer := &input[0]
inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
outputFirstElementPointer := &output[0]
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
// we're only going to make one call to iconv
if inputLeft > 0 && outputLeft > 0 {
_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
// update byte counters
bytesRead = len(input) - int(inputLeft)
bytesWritten = len(output) - int(outputLeft)
}
return bytesRead, bytesWritten, err
}
// convert the bytes of a string and return the resulting string
//
// TODO: can we do this in terms of Convert function
func (this *Converter) ConvertString(input string) (output string, err os.Error) {
// both our input buffer and output buffer will be the same size
// but we'll reuse our output buffer each time its filled
bufferSize := len(input)
sourceLeft := C.size_t(bufferSize)
outputLeft := sourceLeft
outputReset := outputLeft
// our input buffer is the source string, but iconv will track
// how many bytes has left to process
sourceBuffer := C.CString(input)
sourcePointer := &sourceBuffer
outputBuffer := make([]byte, bufferSize)
outputFirstPointer := &outputBuffer[0]
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))
// process the source with iconv in a loop
for sourceLeft > 0 {
//fmt.Println("calling to iconv")
_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)
//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft))
// check the err - most interested if we need to expand the output buffer
if err != nil {
//fmt.Println("got error value: ", err)
if err == E2BIG {
// we need more output buffer to continue
// instead of resizing, lets pull what we got so far
// and set outputLeft back to the buffer size
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
outputLeft = outputReset
} else {
// we got an error we can't continue with
break
}
}
}
// free our sourceBuffer, no longer needed
//C.free(unsafe.Pointer(&sourceBuffer))
// convert output buffer a go string
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
// free our outputBuffer, no longer needed
//C.free(unsafe.Pointer(&outputBuffer))
// return result and any err
return output, err
}

46
iconv.go Normal file
View File

@ -0,0 +1,46 @@
package iconv
// #include <errno.h>
import "C"
import (
"os"
)
// allows us to check for iconv specific errors
type Error os.Error
var (
EILSEQ Error = os.Errno(int(C.EILSEQ))
E2BIG Error = os.Errno(int(C.E2BIG))
)
func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) {
// create a new converter
converter, err := NewConverter(fromEncoding, toEncoding)
if err == nil {
// call Convert
bytesRead, bytesWritten, err = converter.Convert(input, output)
// close the converter
converter.Close()
}
return
}
func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) {
// create a new converter
converter, err := NewConverter(fromEncoding, toEncoding)
if err == nil {
// convert the string
output, err = converter.ConvertString(input)
// close the converter
converter.Close()
}
return
}

101
reader.go Normal file
View File

@ -0,0 +1,101 @@
package iconv
import (
"io"
"os"
)
type Reader struct {
source io.Reader
converter *Converter
rawBuffer []byte
rawReadPos, rawWritePos int
convertedBuffer []byte
convertedReadPos, convertedWritePos int
}
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
// create a converter
converter, err := NewConverter(fromEncoding, toEncoding)
if err == nil {
return NewReaderFromConverter(source, converter), err
}
// return the error
return nil, err
}
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) {
reader = new(Reader)
// copy elements
reader.source = source
reader.converter = converter
// create 8K buffers
reader.rawBuffer = make([]byte, 8 * 1024)
reader.convertedBuffer = make([]byte, 8 * 1024)
return reader
}
func (this *Reader) fillRawBuffer() {
// slide existing data to beginning
if this.rawReadPos > 0 {
// copy current bytes
copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos])
// adjust positions
this.rawWritePos -= this.rawReadPos
this.rawReadPos = 0
}
// read new data into buffer at write position
bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:])
// adjust write position
this.rawWritePos += bytesRead
// track source reader errors
if err != nil {
// not sure where to put this for now
}
}
func (this *Reader) fillConvertedBuffer() {
// slide existing data to beginning
if this.convertedReadPos > 0 {
// copy current bytes
copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
// adjust positions
this.convertedWritePos -= this.convertedReadPos
this.convertedReadPos = 0
}
// use iconv to fill the converted buffer from the raw buffer
bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
// adjust read and write positions
this.rawReadPos += bytesRead
this.convertedWritePos += bytesWritten
// track iconv convert errors
if err != nil {
// not sure where to put this for now
}
}
// implement the io.Reader interface
func (this *Reader) Read(p []byte) (n int, err os.Error) {
this.fillRawBuffer()
this.fillConvertedBuffer()
if this.convertedWritePos - 1 > this.convertedReadPos {
// copy converted bytes into p
n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
}
return
}

1
sample.ebcdic-us Normal file
View File

@ -0,0 +1 @@
Č…““–@晓„Z%

106
sample.go Normal file
View File

@ -0,0 +1,106 @@
package main
import (
"encoding/hex"
"io/ioutil"
"iconv"
"fmt"
"os"
)
func main() {
// read bytes from sample.utf8
utf8Bytes, err := ioutil.ReadFile("sample.utf8")
if (err != nil) {
fmt.Println("Could not open 'sample.utf8': ", err)
}
// read bytes from sample.ebcdic-us
ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us")
if err != nil {
fmt.Println("Could not open 'sample.ebcdic-us': ", err)
}
// use iconv to check conversions both ways
utf8String := string(utf8Bytes)
ebcdicString := string(ebcdicBytes)
// convert from utf-8 to ebcdic
utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us")
if err != nil || ebcdicString != utf8ConvertedString {
// generate hex string
ebcdicHexString := hex.EncodeToString(ebcdicBytes)
utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString))
fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err)
fmt.Println(ebcdicHexString, " - ", len(ebcdicString))
fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString))
} else {
fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString")
}
// convert from ebcdic to utf-8
ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8")
if err != nil || utf8String != ebcdicConvertedString {
// generate hex string
utf8HexString := hex.EncodeToString(utf8Bytes)
ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString))
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err)
fmt.Println(utf8HexString, " - ", len(utf8String))
fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString))
} else {
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString")
}
testBuffer := make([]byte, len(ebcdicBytes) * 2)
// convert from ebdic bytes to utf-8 bytes
bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8")
if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) {
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err)
} else {
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert")
}
// convert from utf-8 bytes to ebcdic bytes
bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us")
if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) {
fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err)
} else {
fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert")
}
// test iconv.Reader
utf8File,_ := os.Open("sample.utf8", os.O_RDONLY, 0)
utf8Reader,_ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us")
bytesRead, err = utf8Reader.Read(testBuffer)
if err != nil || bytesRead != len(ebcdicBytes) {
fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err)
} else {
fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader")
}
ebcdicFile,_ := os.Open("sample.ebcdic-us", os.O_RDONLY, 0)
ebcdicReader,_ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8")
bytesRead, err = ebcdicReader.Read(testBuffer)
if err != nil || bytesRead != len(utf8Bytes) {
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err)
if bytesRead > 0 {
fmt.Println(string(testBuffer[:bytesRead]))
fmt.Println(hex.EncodeToString(testBuffer[:bytesRead]))
fmt.Println(hex.EncodeToString(utf8Bytes))
}
} else {
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader")
}
}

1
sample.utf8 Normal file
View File

@ -0,0 +1 @@
Hello World!