* Added README.md

* moved sample programs to examples directory
 * cleaned up make make file
 * converter.go: ConvertString now uses Convert under the hood, removes some code duplication
 * reader.go: No need to have two separate buffers, can write directly in to buffer given in Read call. Simplifies code greatly
This commit is contained in:
Donovan Jimenez 2011-01-15 04:06:50 -05:00
parent 82db0fae9a
commit 690531c87e
8 changed files with 164 additions and 129 deletions

View File

@ -1,34 +1,15 @@
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include $(GOROOT)/src/Make.inc include $(GOROOT)/src/Make.inc
# target package name
TARG=iconv TARG=iconv
# regular go files
GOFILES=\ GOFILES=\
reader.go reader.go\
# files that must be processed by cgo
CGOFILES=\ CGOFILES=\
converter.go\
iconv.go\ iconv.go\
converter.go
ifeq ($(GOOS),windows)
CGO_LDFLAGS=-liconv
endif
# To add flags necessary for locating the library or its include files,
# set CGO_CFLAGS or CGO_LDFLAGS. For example, to use an
# alternate installation of the library:
# CGO_CFLAGS=-I/home/rsc/gmp32/include
# CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
# Note the += on the second line.
CLEANFILES+=sample
include $(GOROOT)/src/Make.pkg include $(GOROOT)/src/Make.pkg
# simple test program to test iconv conversion
sample: install sample.go
$(GC) $@.go
$(LD) -o $@ $@.$O

72
README.md Normal file
View File

@ -0,0 +1,72 @@
Install
=======
The goinstall command can be used:
goinstall github.com/djimenez/iconv.go
Or, you can clone the repository and use gomake instead
git clone git://github.com/djimenez/iconv.go.git iconv
cd iconv
gomake install
Usage
=====
To use the package, you'll need the appropriate import statement:
import (
// if you used goinstall, you'll want this import
iconv "github.com/djimenez/iconv.go"
// if you used gomake install directly, you'll want this import
iconv
)
Converting string Values
------------------------
Converting a string can be done with two methods. First, there's iconv.ConvertString(input, fromEncoding, toEncoding string)
output,_ := iconv.ConvertString("Hello World!", "utf-8", "windows-1252")
Alternatively, you can create a converter and use its ConvertString method. This mostly just saves having to parse the from and to encodings when converting many strings in the same way.
converter := iconv.NewConverter("utf-8", "windows-1252")
output,_ := converter.ConvertString("Hello World!")
Converting []byte Values
------------------------
Converting a []byte can similarly be done with two methods. First, there's iconv.Convert(input, output []byte, fromEncoding, toEncoding string). You'll immediately notice this requires you to give it both the input and output buffer. Ideally, the output buffer should be sized so that it can hold all converted bytes from input, but if it cannot, then Convert will put as many bytes as it can into the buffer without creating an invalid sequence. For example, if iconv only has a single byte left in the output buffer but needs 2 or more for the complete character in a multibyte encoding it will stop writing to the buffer and return with an iconv.E2BIG error.
input := []byte("Hello World!")
output := make([]byte, len(input))
bytesRead, bytesWritten, error := iconv.Convert(input, output, "utf-8", "windows-1252")
Just like with ConvertString, there is also a Convert method on Converter that can be used.
...
converter := iconv.NewConverter("utf-8", "windows-1252")
bytesRead, bytesWritten, error := converter.Convert(input, output)
Converting an *io.Reader
------------------------
The iconv.Reader allows any other *io.Reader to be wrapped and have its bytes transcoded as they are read.
// We're wrapping stdin for simplicity, but a File or network reader could be wrapped as well
reader,_ := iconv.NewReader(os.Stdin, "utf-8", "windows-1252")
Converting an *io.Writer
------------------------
To be written.
Piping a Conversion
-------------------
To be written.

View File

@ -13,7 +13,7 @@ type Converter struct {
open bool open bool
} }
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) { func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err Error) {
converter = new(Converter) converter = new(Converter)
converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding)) converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
@ -47,21 +47,21 @@ func (this *Converter) Close() (err os.Error) {
// //
// NOTE: not all bytes may be consumed from the input. This can be because the output // NOTE: not all bytes may be consumed from the input. This can be because the output
// buffer is too small or because there were iconv errors // buffer is too small or because there were iconv errors
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) { func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err Error) {
inputLeft := C.size_t(len(input)) inputLeft := C.size_t(len(input))
outputLeft := C.size_t(len(output)) outputLeft := C.size_t(len(output))
// we're going to give iconv the pointers to the underlying
// storage of each byte slice - so far this is the simplest
// way i've found to do that in Go, but it seems ugly
inputFirstElementPointer := &input[0]
inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
outputFirstElementPointer := &output[0]
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
// we're only going to make one call to iconv
if inputLeft > 0 && outputLeft > 0 { if inputLeft > 0 && outputLeft > 0 {
// we're going to give iconv the pointers to the underlying
// storage of each byte slice - so far this is the simplest
// way i've found to do that in Go, but it seems ugly
inputFirstElementPointer := &input[0]
inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))
outputFirstElementPointer := &output[0]
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
// we're only going to make one call to iconv
_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft) _,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
// update byte counters // update byte counters
@ -72,59 +72,42 @@ func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, byte
return bytesRead, bytesWritten, err return bytesRead, bytesWritten, err
} }
// convert the bytes of a string and return the resulting string // convert a string value, returning a new string value
// func (this *Converter) ConvertString(input string) (output string, err Error) {
// TODO: can we do this in terms of Convert function
func (this *Converter) ConvertString(input string) (output string, err os.Error) {
// both our input buffer and output buffer will be the same size
// but we'll reuse our output buffer each time its filled
bufferSize := len(input)
sourceLeft := C.size_t(bufferSize)
outputLeft := sourceLeft
outputReset := outputLeft
// our input buffer is the source string, but iconv will track // construct the buffers
// how many bytes has left to process inputBuffer := []byte(input)
sourceBuffer := C.CString(input) outputBuffer := make([]byte, len(inputBuffer) * 2) // we use a larger buffer to help avoid resizing later
sourcePointer := &sourceBuffer
outputBuffer := make([]byte, bufferSize) // call Convert until all input bytes are read or an error occurs
outputFirstPointer := &outputBuffer[0] var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))
// process the source with iconv in a loop for totalBytesRead < len(inputBuffer) && err == nil {
for sourceLeft > 0 { bytesRead, bytesWritten, err = this.Convert(inputBuffer, outputBuffer)
//fmt.Println("calling to iconv")
_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)
//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft)) totalBytesRead += bytesRead
totalBytesWritten += bytesWritten
// check the err - most interested if we need to expand the output buffer // check for the E2BIG error specifically, we can add to the output
if err != nil { // buffer to correct for it and then continue
//fmt.Println("got error value: ", err) if err == E2BIG {
// increase the size of the output buffer by another input length
// first, create a new buffer
tempBuffer := make([]byte, len(outputBuffer) + len(inputBuffer))
if err == E2BIG { // copy the existing data
// we need more output buffer to continue copy(tempBuffer, outputBuffer)
// instead of resizing, lets pull what we got so far
// and set outputLeft back to the buffer size // switch the buffers
output += string(outputBuffer[0:bufferSize - int(outputLeft)]) outputBuffer = tempBuffer
outputLeft = outputReset
} else { // forget the error
// we got an error we can't continue with err = nil
break
}
} }
} }
// free our sourceBuffer, no longer needed // construct the final output string
//C.free(unsafe.Pointer(&sourceBuffer)) output = string(outputBuffer[:totalBytesWritten])
// convert output buffer a go string
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
// free our outputBuffer, no longer needed
//C.free(unsafe.Pointer(&outputBuffer))
// return result and any err
return output, err return output, err
} }

View File

@ -15,7 +15,7 @@ var (
E2BIG Error = os.Errno(int(C.E2BIG)) E2BIG Error = os.Errno(int(C.E2BIG))
) )
func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) { func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err Error) {
// create a new converter // create a new converter
converter, err := NewConverter(fromEncoding, toEncoding) converter, err := NewConverter(fromEncoding, toEncoding)
@ -30,7 +30,7 @@ func Convert(input []byte, output []byte, fromEncoding string, toEncoding string
return return
} }
func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) { func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err Error) {
// create a new converter // create a new converter
converter, err := NewConverter(fromEncoding, toEncoding) converter, err := NewConverter(fromEncoding, toEncoding)

View File

@ -8,10 +8,9 @@ import (
type Reader struct { type Reader struct {
source io.Reader source io.Reader
converter *Converter converter *Converter
rawBuffer []byte buffer []byte
rawReadPos, rawWritePos int readPos, writePos int
convertedBuffer []byte err os.Error
convertedReadPos, convertedWritePos int
} }
func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) { func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
@ -34,68 +33,68 @@ func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Rea
reader.converter = converter reader.converter = converter
// create 8K buffers // create 8K buffers
reader.rawBuffer = make([]byte, 8 * 1024) reader.buffer = make([]byte, 8 * 1024)
reader.convertedBuffer = make([]byte, 8 * 1024)
return reader return reader
} }
func (this *Reader) fillRawBuffer() { func (this *Reader) fillBuffer() {
// slide existing data to beginning // slide existing data to beginning
if this.rawReadPos > 0 { if this.readPos > 0 {
// copy current bytes // copy current bytes - is this guaranteed safe?
copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos]) copy(this.buffer, this.buffer[this.readPos:this.writePos])
// adjust positions // adjust positions
this.rawWritePos -= this.rawReadPos this.writePos -= this.readPos
this.rawReadPos = 0 this.readPos = 0
} }
// read new data into buffer at write position // read new data into buffer at write position
bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:]) bytesRead, err := this.source.Read(this.buffer[this.writePos:])
// adjust write position // adjust write position
this.rawWritePos += bytesRead this.writePos += bytesRead
// track source reader errors // track any reader error / EOF
if err != nil { if err != nil {
// not sure where to put this for now this.err = err
}
}
func (this *Reader) fillConvertedBuffer() {
// slide existing data to beginning
if this.convertedReadPos > 0 {
// copy current bytes
copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
// adjust positions
this.convertedWritePos -= this.convertedReadPos
this.convertedReadPos = 0
}
// use iconv to fill the converted buffer from the raw buffer
bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
// adjust read and write positions
this.rawReadPos += bytesRead
this.convertedWritePos += bytesWritten
// track iconv convert errors
if err != nil {
// not sure where to put this for now
} }
} }
// implement the io.Reader interface // implement the io.Reader interface
func (this *Reader) Read(p []byte) (n int, err os.Error) { func (this *Reader) Read(p []byte) (n int, err os.Error) {
this.fillRawBuffer() // checks for when we have no data
this.fillConvertedBuffer() for this.writePos == 0 || this.readPos == this.writePos {
// if we have an error / EOF, just return it
if this.err != nil {
return n, this.err
}
if this.convertedWritePos - 1 > this.convertedReadPos { // else, fill our buffer
// copy converted bytes into p this.fillBuffer()
n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
} }
return // TODO: checks for when we have less data than len(p)
// we should have an appropriate amount of data, convert it into the given buffer
bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p)
// adjust byte counters
this.readPos += bytesRead
n += bytesWritten
// if we experienced an iconv error, check it
if err != nil {
// E2BIG errors can be ignored (we'll get them often) as long
// as at least 1 byte was written. If we experienced an E2BIG
// and no bytes were written then the buffer is too small for
// even the next character
if err != E2BIG || bytesWritten == 0 {
// track anything else
this.err = err
}
}
// return our results
return n, this.err
} }