Browse Source

* Added README.md

* moved sample programs to examples directory
 * cleaned up make make file
 * converter.go: ConvertString now uses Convert under the hood, removes some code duplication
 * reader.go: No need to have two separate buffers, can write directly in to buffer given in Read call. Simplifies code greatly
tags/v1.0.1
Donovan Jimenez 13 years ago
parent
commit
690531c87e
8 changed files with 167 additions and 132 deletions
  1. +5
    -24
      Makefile
  2. +72
    -0
      README.md
  3. +45
    -62
      converter.go
  4. +0
    -0
      examples/sample.ebcdic-us
  5. +0
    -0
      examples/sample.go
  6. +0
    -0
      examples/sample.utf8
  7. +2
    -2
      iconv.go
  8. +43
    -44
      reader.go

+ 5
- 24
Makefile View File

@@ -1,34 +1,15 @@
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

include $(GOROOT)/src/Make.inc

# target package name
TARG=iconv

# regular go files
GOFILES=\
reader.go
reader.go\

# files that must be processed by cgo
CGOFILES=\
converter.go\
iconv.go\
converter.go

ifeq ($(GOOS),windows)
CGO_LDFLAGS=-liconv
endif

# To add flags necessary for locating the library or its include files,
# set CGO_CFLAGS or CGO_LDFLAGS. For example, to use an
# alternate installation of the library:
# CGO_CFLAGS=-I/home/rsc/gmp32/include
# CGO_LDFLAGS+=-L/home/rsc/gmp32/lib
# Note the += on the second line.

CLEANFILES+=sample

include $(GOROOT)/src/Make.pkg

# simple test program to test iconv conversion
sample: install sample.go
$(GC) $@.go
$(LD) -o $@ $@.$O

+ 72
- 0
README.md View File

@@ -0,0 +1,72 @@
Install
=======

The goinstall command can be used:

goinstall github.com/djimenez/iconv.go

Or, you can clone the repository and use gomake instead

git clone git://github.com/djimenez/iconv.go.git iconv
cd iconv
gomake install

Usage
=====

To use the package, you'll need the appropriate import statement:

import (
// if you used goinstall, you'll want this import
iconv "github.com/djimenez/iconv.go"

// if you used gomake install directly, you'll want this import
iconv
)

Converting string Values
------------------------

Converting a string can be done with two methods. First, there's iconv.ConvertString(input, fromEncoding, toEncoding string)

output,_ := iconv.ConvertString("Hello World!", "utf-8", "windows-1252")

Alternatively, you can create a converter and use its ConvertString method. This mostly just saves having to parse the from and to encodings when converting many strings in the same way.

converter := iconv.NewConverter("utf-8", "windows-1252")
output,_ := converter.ConvertString("Hello World!")

Converting []byte Values
------------------------

Converting a []byte can similarly be done with two methods. First, there's iconv.Convert(input, output []byte, fromEncoding, toEncoding string). You'll immediately notice this requires you to give it both the input and output buffer. Ideally, the output buffer should be sized so that it can hold all converted bytes from input, but if it cannot, then Convert will put as many bytes as it can into the buffer without creating an invalid sequence. For example, if iconv only has a single byte left in the output buffer but needs 2 or more for the complete character in a multibyte encoding it will stop writing to the buffer and return with an iconv.E2BIG error.

input := []byte("Hello World!")
output := make([]byte, len(input))
bytesRead, bytesWritten, error := iconv.Convert(input, output, "utf-8", "windows-1252")

Just like with ConvertString, there is also a Convert method on Converter that can be used.

...
converter := iconv.NewConverter("utf-8", "windows-1252")
bytesRead, bytesWritten, error := converter.Convert(input, output)

Converting an *io.Reader
------------------------

The iconv.Reader allows any other *io.Reader to be wrapped and have its bytes transcoded as they are read.

// We're wrapping stdin for simplicity, but a File or network reader could be wrapped as well
reader,_ := iconv.NewReader(os.Stdin, "utf-8", "windows-1252")

Converting an *io.Writer
------------------------

To be written.

Piping a Conversion
-------------------

To be written.

+ 45
- 62
converter.go View File

@@ -13,7 +13,7 @@ type Converter struct {
open bool
}

func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err os.Error) {
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err Error) {
converter = new(Converter)

converter.context, err = C.iconv_open(C.CString(toEncoding), C.CString(fromEncoding))
@@ -47,21 +47,21 @@ func (this *Converter) Close() (err os.Error) {
//
// NOTE: not all bytes may be consumed from the input. This can be because the output
// buffer is too small or because there were iconv errors
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err os.Error) {
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err Error) {
inputLeft := C.size_t(len(input))
outputLeft := C.size_t(len(output))
if inputLeft > 0 && outputLeft > 0 {
// we're going to give iconv the pointers to the underlying
// storage of each byte slice - so far this is the simplest
// way i've found to do that in Go, but it seems ugly
inputFirstElementPointer := &input[0]
inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))

// we're going to give iconv the pointers to the underlying
// storage of each byte slice - so far this is the simplest
// way i've found to do that in Go, but it seems ugly
inputFirstElementPointer := &input[0]
inputPointer := (**C.char)(unsafe.Pointer(&inputFirstElementPointer))

outputFirstElementPointer := &output[0]
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))
outputFirstElementPointer := &output[0]
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstElementPointer))

// we're only going to make one call to iconv
if inputLeft > 0 && outputLeft > 0 {
// we're only going to make one call to iconv
_,err = C.iconv(this.context, inputPointer, &inputLeft, outputPointer, &outputLeft)

// update byte counters
@@ -72,59 +72,42 @@ func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, byte
return bytesRead, bytesWritten, err
}

// convert the bytes of a string and return the resulting string
//
// TODO: can we do this in terms of Convert function
func (this *Converter) ConvertString(input string) (output string, err os.Error) {
// both our input buffer and output buffer will be the same size
// but we'll reuse our output buffer each time its filled
bufferSize := len(input)
sourceLeft := C.size_t(bufferSize)
outputLeft := sourceLeft
outputReset := outputLeft

// our input buffer is the source string, but iconv will track
// how many bytes has left to process
sourceBuffer := C.CString(input)
sourcePointer := &sourceBuffer

outputBuffer := make([]byte, bufferSize)
outputFirstPointer := &outputBuffer[0]
outputPointer := (**C.char)(unsafe.Pointer(&outputFirstPointer))

// process the source with iconv in a loop
for sourceLeft > 0 {
//fmt.Println("calling to iconv")
_,err := C.iconv(this.context, sourcePointer, &sourceLeft, outputPointer, &outputLeft)

//fmt.Println("sourceLeft: ", int(sourceLeft), " outputLeft: ", int(outputLeft))

// check the err - most interested if we need to expand the output buffer
if err != nil {
//fmt.Println("got error value: ", err)

if err == E2BIG {
// we need more output buffer to continue
// instead of resizing, lets pull what we got so far
// and set outputLeft back to the buffer size
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
outputLeft = outputReset
} else {
// we got an error we can't continue with
break
}
// convert a string value, returning a new string value
func (this *Converter) ConvertString(input string) (output string, err Error) {

// construct the buffers
inputBuffer := []byte(input)
outputBuffer := make([]byte, len(inputBuffer) * 2) // we use a larger buffer to help avoid resizing later

// call Convert until all input bytes are read or an error occurs
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int

for totalBytesRead < len(inputBuffer) && err == nil {
bytesRead, bytesWritten, err = this.Convert(inputBuffer, outputBuffer)

totalBytesRead += bytesRead
totalBytesWritten += bytesWritten

// check for the E2BIG error specifically, we can add to the output
// buffer to correct for it and then continue
if err == E2BIG {
// increase the size of the output buffer by another input length
// first, create a new buffer
tempBuffer := make([]byte, len(outputBuffer) + len(inputBuffer))
// copy the existing data
copy(tempBuffer, outputBuffer)

// switch the buffers
outputBuffer = tempBuffer

// forget the error
err = nil
}
}

// free our sourceBuffer, no longer needed
//C.free(unsafe.Pointer(&sourceBuffer))

// convert output buffer a go string
output += string(outputBuffer[0:bufferSize - int(outputLeft)])
// construct the final output string
output = string(outputBuffer[:totalBytesWritten])

// free our outputBuffer, no longer needed
//C.free(unsafe.Pointer(&outputBuffer))
// return result and any err
return output, err
}

sample.ebcdic-us → examples/sample.ebcdic-us View File


sample.go → examples/sample.go View File


sample.utf8 → examples/sample.utf8 View File


+ 2
- 2
iconv.go View File

@@ -15,7 +15,7 @@ var (
E2BIG Error = os.Errno(int(C.E2BIG))
)

func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err os.Error) {
func Convert(input []byte, output []byte, fromEncoding string, toEncoding string) (bytesRead int, bytesWritten int, err Error) {
// create a new converter
converter, err := NewConverter(fromEncoding, toEncoding)

@@ -30,7 +30,7 @@ func Convert(input []byte, output []byte, fromEncoding string, toEncoding string
return
}

func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err os.Error) {
func ConvertString(input string, fromEncoding string, toEncoding string) (output string, err Error) {
// create a new converter
converter, err := NewConverter(fromEncoding, toEncoding)



+ 43
- 44
reader.go View File

@@ -8,10 +8,9 @@ import (
type Reader struct {
source io.Reader
converter *Converter
rawBuffer []byte
rawReadPos, rawWritePos int
convertedBuffer []byte
convertedReadPos, convertedWritePos int
buffer []byte
readPos, writePos int
err os.Error
}

func NewReader(source io.Reader, fromEncoding string, toEncoding string) (*Reader, os.Error) {
@@ -34,68 +33,68 @@ func NewReaderFromConverter(source io.Reader, converter *Converter) (reader *Rea
reader.converter = converter

// create 8K buffers
reader.rawBuffer = make([]byte, 8 * 1024)
reader.convertedBuffer = make([]byte, 8 * 1024)
reader.buffer = make([]byte, 8 * 1024)

return reader
}

func (this *Reader) fillRawBuffer() {
func (this *Reader) fillBuffer() {
// slide existing data to beginning
if this.rawReadPos > 0 {
// copy current bytes
copy(this.rawBuffer, this.rawBuffer[this.rawReadPos:this.rawWritePos])
if this.readPos > 0 {
// copy current bytes - is this guaranteed safe?
copy(this.buffer, this.buffer[this.readPos:this.writePos])

// adjust positions
this.rawWritePos -= this.rawReadPos
this.rawReadPos = 0
this.writePos -= this.readPos
this.readPos = 0
}

// read new data into buffer at write position
bytesRead, err := this.source.Read(this.rawBuffer[this.rawWritePos:])
bytesRead, err := this.source.Read(this.buffer[this.writePos:])

// adjust write position
this.rawWritePos += bytesRead
this.writePos += bytesRead

// track source reader errors
// track any reader error / EOF
if err != nil {
// not sure where to put this for now
this.err = err
}
}

func (this *Reader) fillConvertedBuffer() {
// slide existing data to beginning
if this.convertedReadPos > 0 {
// copy current bytes
copy(this.convertedBuffer, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])

// adjust positions
this.convertedWritePos -= this.convertedReadPos
this.convertedReadPos = 0
}
// implement the io.Reader interface
func (this *Reader) Read(p []byte) (n int, err os.Error) {
// checks for when we have no data
for this.writePos == 0 || this.readPos == this.writePos {
// if we have an error / EOF, just return it
if this.err != nil {
return n, this.err
}

// use iconv to fill the converted buffer from the raw buffer
bytesRead, bytesWritten, err := this.converter.Convert(this.rawBuffer[this.rawReadPos:this.rawWritePos], this.convertedBuffer[this.convertedWritePos:])
// else, fill our buffer
this.fillBuffer()
}

// adjust read and write positions
this.rawReadPos += bytesRead
this.convertedWritePos += bytesWritten
// TODO: checks for when we have less data than len(p)

// track iconv convert errors
if err != nil {
// not sure where to put this for now
}
}
// we should have an appropriate amount of data, convert it into the given buffer
bytesRead, bytesWritten, err := this.converter.Convert(this.buffer[this.readPos:this.writePos], p)

// implement the io.Reader interface
func (this *Reader) Read(p []byte) (n int, err os.Error) {
this.fillRawBuffer()
this.fillConvertedBuffer()
// adjust byte counters
this.readPos += bytesRead
n += bytesWritten

if this.convertedWritePos - 1 > this.convertedReadPos {
// copy converted bytes into p
n = copy(p, this.convertedBuffer[this.convertedReadPos:this.convertedWritePos])
// if we experienced an iconv error, check it
if err != nil {
// E2BIG errors can be ignored (we'll get them often) as long
// as at least 1 byte was written. If we experienced an E2BIG
// and no bytes were written then the buffer is too small for
// even the next character
if err != E2BIG || bytesWritten == 0 {
// track anything else
this.err = err
}
}

return
// return our results
return n, this.err
}

Loading…
Cancel
Save