Initial iconv go package, supports:
* string conversion * byte slice conversion * Reader conversion
This commit is contained in:
commit
82db0fae9a
34
Makefile
Normal file
34
Makefile
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# Copyright 2009 The Go Authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by a BSD-style
|
||||||
|
# license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
include $(GOROOT)/src/Make.inc
|
||||||
|
|
||||||
|
TARG=iconv
|
||||||
|
|
||||||
|
GOFILES=\
|
||||||
|
reader.go
|
||||||
|
|
||||||
|
CGOFILES=\
|
||||||
|
iconv.go\
|
||||||
|
converter.go
|
||||||
|
|
||||||
|
ifeq ($(GOOS),windows)
|
||||||
|
CGO_LDFLAGS=-liconv
|
||||||
|
endif
|
||||||
|
|
||||||
|
# To add flags necessary for locating the library or its include files,
|
||||||
|
# set CGO_CFLAGS or CGO_LDFLAGS. For example, to use an
|
||||||
|
# alternate installation of the library:
|
||||||
|
# CGO_CFLAGS=-I/home/rsc/gmp32/include
|
||||||
|
# CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
|
||||||
|
# Note the += on the second line.
|
||||||
|
|
||||||
|
CLEANFILES+=sample
|
||||||
|
|
||||||
|
include $(GOROOT)/src/Make.pkg
|
||||||
|
|
||||||
|
# simple test program to test iconv conversion
|
||||||
|
sample: install sample.go
|
||||||
|
$(GC) $@.go
|
||||||
|
$(LD) -o $@ $@.$O
|
130
converter.go
Normal file
130
converter.go
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
package iconv
|
||||||
|
|
||||||
|
// #include <iconv.h>
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Converter struct {
|
||||||
|
context C.iconv_t
|
||||||
|
open bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) {
|
||||||
|
converter = new(Converter)
|
||||||
|
|
||||||
|
converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
|
||||||
|
|
||||||
|
// check err
|
||||||
|
if err == nil {
|
||||||
|
// no error, mark the context as open
|
||||||
|
converter.open = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Called before garbage collection
|
||||||
|
func (this *Converter) destroy() {
|
||||||
|
this.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// The converter can be explicitly closed if desired
|
||||||
|
func (this *Converter) Close() (err os.Error) {
|
||||||
|
if this.open {
|
||||||
|
_, err = C.iconv_close(this.context)
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// read bytes from an input buffer, and write them to and output buffer
|
||||||
|
// will return the number of bytesRead from the input and the number of bytes
|
||||||
|
// written to the output as well as any iconv errors
|
||||||
|
//
|
||||||
|
// NOTE: not all bytes may be consumed from the input. This can be because the output
|
||||||
|
// buffer is too small or because there were iconv errors
|
||||||
|
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) {
|
||||||
|
inputLeft := C.size_t(len(input))
|
||||||
|
outputLeft := C.size_t(len(output))
|
||||||
|
|
||||||
|
// we're going to give iconv the pointers to the underlying
|
||||||
|
// storage of each byte slice - so far this is the simplest
|
||||||
|
// way i've found to do that in Go, but it seems ugly
|
||||||
|
inputFirstElementPointer := &input[0]
|
||||||
|
inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
|
||||||
|
|
||||||
|
outputFirstElementPointer := &output[0]
|
||||||
|
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
|
||||||
|
|
||||||
|
// we're only going to make one call to iconv
|
||||||
|
if inputLeft > 0 && outputLeft > 0 {
|
||||||
|
_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
|
||||||
|
|
||||||
|
// update byte counters
|
||||||
|
bytesRead = len(input) - int(inputLeft)
|
||||||
|
bytesWritten = len(output) - int(outputLeft)
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytesRead, bytesWritten, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert the bytes of a string and return the resulting string
|
||||||
|
//
|
||||||
|
// TODO: can we do this in terms of Convert function
|
||||||
|
func (this *Converter) ConvertString(input string) (output string, err os.Error) {
|
||||||
|
// both our input buffer and output buffer will be the same size
|
||||||
|
// but we'll reuse our output buffer each time its filled
|
||||||
|
bufferSize := len(input)
|
||||||
|
sourceLeft := C.size_t(bufferSize)
|
||||||
|
outputLeft := sourceLeft
|
||||||
|
outputReset := outputLeft
|
||||||
|
|
||||||
|
// our input buffer is the source string, but iconv will track
|
||||||
|
// how many bytes has left to process
|
||||||
|
sourceBuffer := C.CString(input)
|
||||||
|
sourcePointer := &sourceBuffer
|
||||||
|
|
||||||
|
outputBuffer := make([]byte, bufferSize)
|
||||||
|
outputFirstPointer := &outputBuffer[0]
|
||||||
|
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))
|
||||||
|
|
||||||
|
// process the source with iconv in a loop
|
||||||
|
for sourceLeft > 0 {
|
||||||
|
//fmt.Println("calling to iconv")
|
||||||
|
_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)
|
||||||
|
|
||||||
|
//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft))
|
||||||
|
|
||||||
|
// check the err - most interested if we need to expand the output buffer
|
||||||
|
if err != nil {
|
||||||
|
//fmt.Println("got error value: ", err)
|
||||||
|
|
||||||
|
if err == E2BIG {
|
||||||
|
// we need more output buffer to continue
|
||||||
|
// instead of resizing, lets pull what we got so far
|
||||||
|
// and set outputLeft back to the buffer size
|
||||||
|
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
|
||||||
|
outputLeft = outputReset
|
||||||
|
} else {
|
||||||
|
// we got an error we can't continue with
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// free our sourceBuffer, no longer needed
|
||||||
|
//C.free(unsafe.Pointer(&sourceBuffer))
|
||||||
|
|
||||||
|
// convert output buffer a go string
|
||||||
|
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
|
||||||
|
|
||||||
|
// free our outputBuffer, no longer needed
|
||||||
|
//C.free(unsafe.Pointer(&outputBuffer))
|
||||||
|
|
||||||
|
// return result and any err
|
||||||
|
return output, err
|
||||||
|
}
|
46
iconv.go
Normal file
46
iconv.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
package iconv
|
||||||
|
|
||||||
|
// #include <errno.h>
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
// allows us to check for iconv specific errors
|
||||||
|
type Error os.Error
|
||||||
|
|
||||||
|
var (
|
||||||
|
EILSEQ Error = os.Errno(int(C.EILSEQ))
|
||||||
|
E2BIG Error = os.Errno(int(C.E2BIG))
|
||||||
|
)
|
||||||
|
|
||||||
|
func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) {
|
||||||
|
// create a new converter
|
||||||
|
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
// call Convert
|
||||||
|
bytesRead, bytesWritten, err = converter.Convert(input, output)
|
||||||
|
|
||||||
|
// close the converter
|
||||||
|
converter.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) {
|
||||||
|
// create a new converter
|
||||||
|
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
// convert the string
|
||||||
|
output, err = converter.ConvertString(input)
|
||||||
|
|
||||||
|
// close the converter
|
||||||
|
converter.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
101
reader.go
Normal file
101
reader.go
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
package iconv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Reader struct {
|
||||||
|
source io.Reader
|
||||||
|
converter *Converter
|
||||||
|
rawBuffer []byte
|
||||||
|
rawReadPos, rawWritePos int
|
||||||
|
convertedBuffer []byte
|
||||||
|
convertedReadPos, convertedWritePos int
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
|
||||||
|
// create a converter
|
||||||
|
converter, err := NewConverter(fromEncoding, toEncoding)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
return NewReaderFromConverter(source, converter), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// return the error
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Reader) {
|
||||||
|
reader = new(Reader)
|
||||||
|
|
||||||
|
// copy elements
|
||||||
|
reader.source = source
|
||||||
|
reader.converter = converter
|
||||||
|
|
||||||
|
// create 8K buffers
|
||||||
|
reader.rawBuffer = make([]byte, 8 * 1024)
|
||||||
|
reader.convertedBuffer = make([]byte, 8 * 1024)
|
||||||
|
|
||||||
|
return reader
|
||||||
|
}
|
||||||
|
|
||||||
|
func (this *Reader) fillRawBuffer() {
|
||||||
|
// slide existing data to beginning
|
||||||
|
if this.rawReadPos > 0 {
|
||||||
|
// copy current bytes
|
||||||
|
copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos])
|
||||||
|
|
||||||
|
// adjust positions
|
||||||
|
this.rawWritePos -= this.rawReadPos
|
||||||
|
this.rawReadPos = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// read new data into buffer at write position
|
||||||
|
bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:])
|
||||||
|
|
||||||
|
// adjust write position
|
||||||
|
this.rawWritePos += bytesRead
|
||||||
|
|
||||||
|
// track source reader errors
|
||||||
|
if err != nil {
|
||||||
|
// not sure where to put this for now
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (this *Reader) fillConvertedBuffer() {
|
||||||
|
// slide existing data to beginning
|
||||||
|
if this.convertedReadPos > 0 {
|
||||||
|
// copy current bytes
|
||||||
|
copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
|
||||||
|
|
||||||
|
// adjust positions
|
||||||
|
this.convertedWritePos -= this.convertedReadPos
|
||||||
|
this.convertedReadPos = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// use iconv to fill the converted buffer from the raw buffer
|
||||||
|
bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
|
||||||
|
|
||||||
|
// adjust read and write positions
|
||||||
|
this.rawReadPos += bytesRead
|
||||||
|
this.convertedWritePos += bytesWritten
|
||||||
|
|
||||||
|
// track iconv convert errors
|
||||||
|
if err != nil {
|
||||||
|
// not sure where to put this for now
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// implement the io.Reader interface
|
||||||
|
func (this *Reader) Read(p []byte) (n int, err os.Error) {
|
||||||
|
this.fillRawBuffer()
|
||||||
|
this.fillConvertedBuffer()
|
||||||
|
|
||||||
|
if this.convertedWritePos - 1 > this.convertedReadPos {
|
||||||
|
// copy converted bytes into p
|
||||||
|
n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
1
sample.ebcdic-us
Normal file
1
sample.ebcdic-us
Normal file
@ -0,0 +1 @@
|
|||||||
|
Č…““–@ć–™“„Z%
|
106
sample.go
Normal file
106
sample.go
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"io/ioutil"
|
||||||
|
"iconv"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// read bytes from sample.utf8
|
||||||
|
utf8Bytes, err := ioutil.ReadFile("sample.utf8")
|
||||||
|
|
||||||
|
if (err != nil) {
|
||||||
|
fmt.Println("Could not open 'sample.utf8': ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// read bytes from sample.ebcdic-us
|
||||||
|
ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not open 'sample.ebcdic-us': ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// use iconv to check conversions both ways
|
||||||
|
utf8String := string(utf8Bytes)
|
||||||
|
ebcdicString := string(ebcdicBytes)
|
||||||
|
|
||||||
|
// convert from utf-8 to ebcdic
|
||||||
|
utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us")
|
||||||
|
|
||||||
|
if err != nil || ebcdicString != utf8ConvertedString {
|
||||||
|
// generate hex string
|
||||||
|
ebcdicHexString := hex.EncodeToString(ebcdicBytes)
|
||||||
|
utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString))
|
||||||
|
|
||||||
|
fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err)
|
||||||
|
fmt.Println(ebcdicHexString, " - ", len(ebcdicString))
|
||||||
|
fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString))
|
||||||
|
} else {
|
||||||
|
fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString")
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert from ebcdic to utf-8
|
||||||
|
ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8")
|
||||||
|
|
||||||
|
if err != nil || utf8String != ebcdicConvertedString {
|
||||||
|
// generate hex string
|
||||||
|
utf8HexString := hex.EncodeToString(utf8Bytes)
|
||||||
|
ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString))
|
||||||
|
|
||||||
|
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err)
|
||||||
|
fmt.Println(utf8HexString, " - ", len(utf8String))
|
||||||
|
fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString))
|
||||||
|
} else {
|
||||||
|
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString")
|
||||||
|
}
|
||||||
|
|
||||||
|
testBuffer := make([]byte, len(ebcdicBytes) * 2)
|
||||||
|
|
||||||
|
// convert from ebdic bytes to utf-8 bytes
|
||||||
|
bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8")
|
||||||
|
|
||||||
|
if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) {
|
||||||
|
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err)
|
||||||
|
} else {
|
||||||
|
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert")
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert from utf-8 bytes to ebcdic bytes
|
||||||
|
bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us")
|
||||||
|
|
||||||
|
if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) {
|
||||||
|
fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err)
|
||||||
|
} else {
|
||||||
|
fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert")
|
||||||
|
}
|
||||||
|
|
||||||
|
// test iconv.Reader
|
||||||
|
utf8File,_ := os.Open("sample.utf8", os.O_RDONLY, 0)
|
||||||
|
utf8Reader,_ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us")
|
||||||
|
bytesRead, err = utf8Reader.Read(testBuffer)
|
||||||
|
|
||||||
|
if err != nil || bytesRead != len(ebcdicBytes) {
|
||||||
|
fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err)
|
||||||
|
} else {
|
||||||
|
fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader")
|
||||||
|
}
|
||||||
|
|
||||||
|
ebcdicFile,_ := os.Open("sample.ebcdic-us", os.O_RDONLY, 0)
|
||||||
|
ebcdicReader,_ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8")
|
||||||
|
bytesRead, err = ebcdicReader.Read(testBuffer)
|
||||||
|
|
||||||
|
if err != nil || bytesRead != len(utf8Bytes) {
|
||||||
|
fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err)
|
||||||
|
|
||||||
|
if bytesRead > 0 {
|
||||||
|
fmt.Println(string(testBuffer[:bytesRead]))
|
||||||
|
fmt.Println(hex.EncodeToString(testBuffer[:bytesRead]))
|
||||||
|
fmt.Println(hex.EncodeToString(utf8Bytes))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader")
|
||||||
|
}
|
||||||
|
}
|
1
sample.utf8
Normal file
1
sample.utf8
Normal file
@ -0,0 +1 @@
|
|||||||
|
Hello World!
|
Loading…
Reference in New Issue
Block a user