iconv-go/converter.go

163 lines
5.0 KiB
Go
Raw Normal View History

package iconv
/*
#cgo darwin LDFLAGS: -liconv
#cgo freebsd LDFLAGS: -liconv
#cgo linux LDFLAGS: -liconv
#cgo windows LDFLAGS: -liconv
#include <stdlib.h>
#include <iconv.h>
*/
import "C"
import "syscall"
import "unsafe"
type Converter struct {
context C.iconv_t
open bool
}
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
// there is not enough memory to initialize an iconv descriptor
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
converter = new(Converter)
// convert to C strings
toEncodingC := C.CString(toEncoding)
fromEncodingC := C.CString(fromEncoding)
// open an iconv descriptor
converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)
// free the C Strings
C.free(unsafe.Pointer(toEncodingC))
C.free(unsafe.Pointer(fromEncodingC))
// check err
if err == nil {
// no error, mark the context as open
converter.open = true
}
return
}
// destroy is called during garbage collection
func (this *Converter) destroy() {
this.Close()
}
// Close a Converter's iconv description explicitly
func (this *Converter) Close() (err error) {
if this.open {
_, err = C.iconv_close(this.context)
}
return
}
// Convert bytes from an input byte slice into a give output byte slice
//
// As many bytes that can converted and fit into the size of output will be
// processed and the number of bytes read for input as well as the number of
// bytes written to output will be returned. If not all converted bytes can fit
// into output and E2BIG error will also be returned. If input contains an invalid
// sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
//
// For shift based output encodings, any end shift byte sequences can be generated by
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
// will simply reset the iconv descriptor shift state without writing any bytes.
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
// make sure we are still open
if this.open {
inputLeft := C.size_t(len(input))
outputLeft := C.size_t(len(output))
if inputLeft > 0 && outputLeft > 0 {
// we have to give iconv a pointer to a pointer of the underlying
// storage of each byte slice - so far this is the simplest
// way i've found to do that in Go, but it seems ugly
inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
_, err = C.iconv(this.context, &inputPointer, &inputLeft, &outputPointer, &outputLeft)
// update byte counters
bytesRead = len(input) - int(inputLeft)
bytesWritten = len(output) - int(outputLeft)
} else if inputLeft == 0 && outputLeft > 0 {
// inputPointer will be nil, outputPointer is generated as above
outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
_, err = C.iconv(this.context, nil, &inputLeft, &outputPointer, &outputLeft)
// update write byte counter
bytesWritten = len(output) - int(outputLeft)
} else {
// both input and output are zero length, do a shift state reset
_, err = C.iconv(this.context, nil, &inputLeft, nil, &outputLeft)
}
} else {
err = syscall.EBADF
}
return bytesRead, bytesWritten, err
}
// Convert an input string
//
// EILSEQ error may be returned if input contains invalid bytes for the
// Converter's fromEncoding.
func (this *Converter) ConvertString(input string) (output string, err error) {
// make sure we are still open
if this.open {
// construct the buffers
inputBuffer := []byte(input)
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
// call Convert until all input bytes are read or an error occurs
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
for totalBytesRead < len(inputBuffer) && err == nil {
// use the totals to create buffer slices
bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
totalBytesRead += bytesRead
totalBytesWritten += bytesWritten
// check for the E2BIG error specifically, we can add to the output
// buffer to correct for it and then continue
if err == syscall.E2BIG {
// increase the size of the output buffer by another input length
// first, create a new buffer
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
// copy the existing data
copy(tempBuffer, outputBuffer)
// switch the buffers
outputBuffer = tempBuffer
// forget the error
err = nil
}
}
if err == nil {
// perform a final shift state reset
_, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
// update total count
totalBytesWritten += bytesWritten
}
// construct the final output string
output = string(outputBuffer[:totalBytesWritten])
} else {
err = syscall.EBADF
}
return output, err
}