Initial iconv go package, supports:
* string conversion * byte slice conversion * Reader conversion
This commit is contained in:
commit
82db0fae9a
34
Makefile
Normal file
34
Makefile
Normal file
@ -0,0 +1,34 @@
|
||||
# Copyright 2009 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include $(GOROOT)/src/Make.inc
|
||||
|
||||
TARG=iconv
|
||||
|
||||
GOFILES=\
|
||||
reader.go
|
||||
|
||||
CGOFILES=\
|
||||
iconv.go\
|
||||
converter.go
|
||||
|
||||
ifeq ($(GOOS),windows)
|
||||
CGO_LDFLAGS=-liconv
|
||||
endif
|
||||
|
||||
# To add flags necessary for locating the library or its include files,
|
||||
# set CGO_CFLAGS or CGO_LDFLAGS. For example, to use an
|
||||
# alternate installation of the library:
|
||||
# CGO_CFLAGS=-I/home/rsc/gmp32/include
|
||||
# CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
|
||||
# Note the += on the second line.
|
||||
|
||||
CLEANFILES+=sample
|
||||
|
||||
include $(GOROOT)/src/Make.pkg
|
||||
|
||||
# simple test program to test iconv conversion
|
||||
sample: install sample.go
|
||||
$(GC) $@.go
|
||||
$(LD) -o $@ $@.$O
|
130
converter.go
Normal file
130
converter.go
Normal file
@ -0,0 +1,130 @@
|
||||
package iconv
|
||||
|
||||
// #include <iconv.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"os"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type Converter struct {
|
||||
context C.iconv_t
|
||||
open bool
|
||||
}
|
||||
|
||||
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) {
|
||||
converter = new(Converter)
|
||||
|
||||
converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
|
||||
|
||||
// check err
|
||||
if err == nil {
|
||||
// no error, mark the context as open
|
||||
converter.open = true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Called before garbage collection
|
||||
func (this *Converter) destroy() {
|
||||
this.Close()
|
||||
}
|
||||
|
||||
// The converter can be explicitly closed if desired
|
||||
func (this *Converter) Close() (err os.Error) {
|
||||
if this.open {
|
||||
_, err = C.iconv_close(this.context)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// read bytes from an input buffer, and write them to and output buffer
|
||||
// will return the number of bytesRead from the input and the number of bytes
|
||||
// written to the output as well as any iconv errors
|
||||
//
|
||||
// NOTE: not all bytes may be consumed from the input. This can be because the output
|
||||
// buffer is too small or because there were iconv errors
|
||||
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) {
|
||||
inputLeft := C.size_t(len(input))
|
||||
outputLeft := C.size_t(len(output))
|
||||
|
||||
// we're going to give iconv the pointers to the underlying
|
||||
// storage of each byte slice - so far this is the simplest
|
||||
// way i've found to do that in Go, but it seems ugly
|
||||
inputFirstElementPointer := &input[0]
|
||||
inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
|
||||
|
||||
outputFirstElementPointer := &output[0]
|
||||
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
|
||||
|
||||
// we're only going to make one call to iconv
|
||||
if inputLeft > 0 && outputLeft > 0 {
|
||||
_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
|
||||
|
||||
// update byte counters
|
||||
bytesRead = len(input) - int(inputLeft)
|
||||
bytesWritten = len(output) - int(outputLeft)
|
||||
}
|
||||
|
||||
return bytesRead, bytesWritten, err
|
||||
}
|
||||
|
||||
// convert the bytes of a string and return the resulting string
|
||||
//
|
||||
// TODO: can we do this in terms of Convert function
|
||||
func (this *Converter) ConvertString(input string) (output string, err os.Error) {
|
||||
// both our input buffer and output buffer will be the same size
|
||||
// but we'll reuse our output buffer each time its filled
|
||||
bufferSize := len(input)
|
||||
sourceLeft := C.size_t(bufferSize)
|
||||
outputLeft := sourceLeft
|
||||
outputReset := outputLeft
|
||||
|
||||
// our input buffer is the source string, but iconv will track
|
||||
// how many bytes has left to process
|
||||
sourceBuffer := C.CString(input)
|
||||
sourcePointer := &sourceBuffer
|
||||
|
||||
outputBuffer := make([]byte, bufferSize)
|
||||
outputFirstPointer := &outputBuffer[0]
|
||||
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))
|
||||
|
||||
// process the source with iconv in a loop
|
||||
for sourceLeft > 0 {
|
||||
//fmt.Println("calling to iconv")
|
||||
_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)
|
||||
|
||||
//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft))
|
||||
|
||||
// check the err - most interested if we need to expand the output buffer
|
||||
if err != nil {
|
||||
//fmt.Println("got error value: ", err)
|
||||
|
||||
if err == E2BIG {
|
||||
// we need more output buffer to continue
|
||||
// instead of resizing, lets pull what we got so far
|
||||
// and set outputLeft back to the buffer size
|
||||
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
|
||||
outputLeft = outputReset
|
||||
} else {
|
||||
// we got an error we can't continue with
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// free our sourceBuffer, no longer needed
|
||||
//C.free(unsafe.Pointer(&sourceBuffer))
|
||||
|
||||
// convert output buffer a go string
|
||||
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
|
||||
|
||||
// free our outputBuffer, no longer needed
|
||||
//C.free(unsafe.Pointer(&outputBuffer))
|
||||
|
||||
// return result and any err
|
||||
return output, err
|
||||
}
|
46
iconv.go
Normal file
46
iconv.go
Normal file
@ -0,0 +1,46 @@
|
||||
package iconv
|
||||
|
||||
// #include <errno.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"os"
|
||||
)
|
||||
|
||||
// allows us to check for iconv specific errors
|
||||
type Error os.Error
|
||||
|
||||
var (
|
||||
EILSEQ Error = os.Errno(int(C.EILSEQ))
|
||||
E2BIG Error = os.Errno(int(C.E2BIG))
|
||||
)
|
||||
|
||||
func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) {
|
||||
// create a new converter
|
||||
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||
|
||||
if err == nil {
|
||||
// call Convert
|
||||
bytesRead, bytesWritten, err = converter.Convert(input, output)
|
||||
|
||||
// close the converter
|
||||
converter.Close()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) {
|
||||
// create a new converter
|
||||
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||
|
||||
if err == nil {
|
||||
// convert the string
|
||||
output, err = converter.ConvertString(input)
|
||||
|
||||
// close the converter
|
||||
converter.Close()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
101
reader.go
Normal file
101
reader.go
Normal file
@ -0,0 +1,101 @@
|
||||
package iconv
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
source io.Reader
|
||||
converter *Converter
|
||||
rawBuffer []byte
|
||||
rawReadPos, rawWritePos int
|
||||
convertedBuffer []byte
|
||||
convertedReadPos, convertedWritePos int
|
||||
}
|
||||
|
||||
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
|
||||
// create a converter
|
||||
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||
|
||||
if err == nil {
|
||||
return NewReaderFromConverter(source, converter), err
|
||||
}
|
||||
|
||||
// return the error
|
||||
return nil, err
|
||||
}
|
||||
|
||||
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) {
|
||||
reader = new(Reader)
|
||||
|
||||
// copy elements
|
||||
reader.source = source
|
||||
reader.converter = converter
|
||||
|
||||
// create 8K buffers
|
||||
reader.rawBuffer = make([]byte, 8 * 1024)
|
||||
reader.convertedBuffer = make([]byte, 8 * 1024)
|
||||
|
||||
return reader
|
||||
}
|
||||
|
||||
func (this *Reader) fillRawBuffer() {
|
||||
// slide existing data to beginning
|
||||
if this.rawReadPos > 0 {
|
||||
// copy current bytes
|
||||
copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos])
|
||||
|
||||
// adjust positions
|
||||
this.rawWritePos -= this.rawReadPos
|
||||
this.rawReadPos = 0
|
||||
}
|
||||
|
||||
// read new data into buffer at write position
|
||||
bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:])
|
||||
|
||||
// adjust write position
|
||||
this.rawWritePos += bytesRead
|
||||
|
||||
// track source reader errors
|
||||
if err != nil {
|
||||
// not sure where to put this for now
|
||||
}
|
||||
}
|
||||
|
||||
func (this *Reader) fillConvertedBuffer() {
|
||||
// slide existing data to beginning
|
||||
if this.convertedReadPos > 0 {
|
||||
// copy current bytes
|
||||
copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
|
||||
|
||||
// adjust positions
|
||||
this.convertedWritePos -= this.convertedReadPos
|
||||
this.convertedReadPos = 0
|
||||
}
|
||||
|
||||
// use iconv to fill the converted buffer from the raw buffer
|
||||
bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
|
||||
|
||||
// adjust read and write positions
|
||||
this.rawReadPos += bytesRead
|
||||
this.convertedWritePos += bytesWritten
|
||||
|
||||
// track iconv convert errors
|
||||
if err != nil {
|
||||
// not sure where to put this for now
|
||||
}
|
||||
}
|
||||
|
||||
// implement the io.Reader interface
|
||||
func (this *Reader) Read(p []byte) (n int, err os.Error) {
|
||||
this.fillRawBuffer()
|
||||
this.fillConvertedBuffer()
|
||||
|
||||
if this.convertedWritePos - 1 > this.convertedReadPos {
|
||||
// copy converted bytes into p
|
||||
n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
|
||||
}
|
||||
|
||||
return
|
||||
}
|
1
sample.ebcdic-us
Normal file
1
sample.ebcdic-us
Normal file
@ -0,0 +1 @@
|
||||
Č…““–@ć–™“„Z%
|
106
sample.go
Normal file
106
sample.go
Normal file
@ -0,0 +1,106 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"io/ioutil"
|
||||
"iconv"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// read bytes from sample.utf8
|
||||
utf8Bytes, err := ioutil.ReadFile("sample.utf8")
|
||||
|
||||
if (err != nil) {
|
||||
fmt.Println("Could not open 'sample.utf8': ", err)
|
||||
}
|
||||
|
||||
// read bytes from sample.ebcdic-us
|
||||
ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us")
|
||||
|
||||
if err != nil {
|
||||
fmt.Println("Could not open 'sample.ebcdic-us': ", err)
|
||||
}
|
||||
|
||||
// use iconv to check conversions both ways
|
||||
utf8String := string(utf8Bytes)
|
||||
ebcdicString := string(ebcdicBytes)
|
||||
|
||||
// convert from utf-8 to ebcdic
|
||||
utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us")
|
||||
|
||||
if err != nil || ebcdicString != utf8ConvertedString {
|
||||
// generate hex string
|
||||
ebcdicHexString := hex.EncodeToString(ebcdicBytes)
|
||||
utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString))
|
||||
|
||||
fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err)
|
||||
fmt.Println(ebcdicHexString, " - ", len(ebcdicString))
|
||||
fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString))
|
||||
} else {
|
||||
fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString")
|
||||
}
|
||||
|
||||
// convert from ebcdic to utf-8
|
||||
ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8")
|
||||
|
||||
if err != nil || utf8String != ebcdicConvertedString {
|
||||
// generate hex string
|
||||
utf8HexString := hex.EncodeToString(utf8Bytes)
|
||||
ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString))
|
||||
|
||||
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err)
|
||||
fmt.Println(utf8HexString, " - ", len(utf8String))
|
||||
fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString))
|
||||
} else {
|
||||
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString")
|
||||
}
|
||||
|
||||
testBuffer := make([]byte, len(ebcdicBytes) * 2)
|
||||
|
||||
// convert from ebdic bytes to utf-8 bytes
|
||||
bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8")
|
||||
|
||||
if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) {
|
||||
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err)
|
||||
} else {
|
||||
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert")
|
||||
}
|
||||
|
||||
// convert from utf-8 bytes to ebcdic bytes
|
||||
bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us")
|
||||
|
||||
if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) {
|
||||
fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err)
|
||||
} else {
|
||||
fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert")
|
||||
}
|
||||
|
||||
// test iconv.Reader
|
||||
utf8File,_ := os.Open("sample.utf8", os.O_RDONLY, 0)
|
||||
utf8Reader,_ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us")
|
||||
bytesRead, err = utf8Reader.Read(testBuffer)
|
||||
|
||||
if err != nil || bytesRead != len(ebcdicBytes) {
|
||||
fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err)
|
||||
} else {
|
||||
fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader")
|
||||
}
|
||||
|
||||
ebcdicFile,_ := os.Open("sample.ebcdic-us", os.O_RDONLY, 0)
|
||||
ebcdicReader,_ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8")
|
||||
bytesRead, err = ebcdicReader.Read(testBuffer)
|
||||
|
||||
if err != nil || bytesRead != len(utf8Bytes) {
|
||||
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err)
|
||||
|
||||
if bytesRead > 0 {
|
||||
fmt.Println(string(testBuffer[:bytesRead]))
|
||||
fmt.Println(hex.EncodeToString(testBuffer[:bytesRead]))
|
||||
fmt.Println(hex.EncodeToString(utf8Bytes))
|
||||
}
|
||||
} else {
|
||||
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader")
|
||||
}
|
||||
}
|
1
sample.utf8
Normal file
1
sample.utf8
Normal file
@ -0,0 +1 @@
|
||||
Hello World!
|
Loading…
Reference in New Issue
Block a user