iconv-go/converter.go
Donovan Jimenez a84994e6e9 Rework Reader and Writer
* add tests that cover same behaviors as Convert and ConvertString
 * align read and write behaviors with bufio to play nice
 * add methods that allow to customize buffer size
 * add methods to reset, allowing reuse
2017-04-29 23:19:49 -04:00

150 lines
4.5 KiB
Go

package iconv
/*
#cgo darwin LDFLAGS: -liconv
#cgo freebsd LDFLAGS: -liconv
#cgo windows LDFLAGS: -liconv
#include <stdlib.h>
#include <iconv.h>
#include <locale.h>
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic
// Therofore we use this wrapper function, to avoid passing **char directly from go
size_t call_iconv(iconv_t ctx, char *in, size_t *size_in, char *out, size_t *size_out){
return iconv(ctx, &in, size_in, &out, size_out);
}
*/
import "C"
import "syscall"
import "unsafe"
type Converter struct {
context C.iconv_t
}
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
// there is not enough memory to initialize an iconv descriptor
func NewConverter(fromEncoding string, toEncoding string) (*Converter, error) {
// convert to C strings
toEncodingC := C.CString(toEncoding)
fromEncodingC := C.CString(fromEncoding)
// open an iconv descriptor
context, err := C.iconv_open(toEncodingC, fromEncodingC)
// free the C Strings
C.free(unsafe.Pointer(toEncodingC))
C.free(unsafe.Pointer(fromEncodingC))
if err != nil {
return nil, err
}
return &Converter{context}, nil
}
// Close a Converter's iconv descriptor explicitly
func (converter *Converter) Close() error {
_, err := C.iconv_close(converter.context)
return err
}
// Reset state of iconv context
func (converter *Converter) Reset() error {
_, _, err := converter.Convert(nil, nil)
return err
}
// Convert bytes from an input byte slice into a give output byte slice
//
// As many bytes that can converted and fit into the size of output will be
// processed and the number of bytes read for input as well as the number of
// bytes written to output will be returned. If not all converted bytes can fit
// into output and E2BIG error will also be returned. If input contains an invalid
// sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
//
// For shift based output encodings, any end shift byte sequences can be generated by
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
// will simply reset the iconv descriptor shift state without writing any bytes.
func (converter *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
inputLeft := C.size_t(len(input))
outputLeft := C.size_t(len(output))
var inputPointer, outputPointer *C.char
if inputLeft > 0 {
inputPointer = (*C.char)(unsafe.Pointer(&input[0]))
}
if outputLeft > 0 {
outputPointer = (*C.char)(unsafe.Pointer(&output[0]))
}
_, err = C.call_iconv(converter.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
bytesRead = len(input) - int(inputLeft)
bytesWritten = len(output) - int(outputLeft)
return bytesRead, bytesWritten, err
}
// Convert an input string
//
// EILSEQ error may be returned if input contains invalid bytes for the Converter's fromEncoding
func (converter *Converter) ConvertString(input string) (output string, err error) {
// construct the buffers
inputBuffer := []byte(input)
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
// call Convert until all input bytes are read or an error occurs
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
for totalBytesRead < len(inputBuffer) && err == nil {
// use the totals to create buffer slices
bytesRead, bytesWritten, err = converter.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
totalBytesRead += bytesRead
totalBytesWritten += bytesWritten
switch err {
case syscall.E2BIG:
// increase the size of the output buffer by another input length
// first, create a new buffer
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
// copy the existing data
copy(tempBuffer, outputBuffer)
// switch the buffers
outputBuffer = tempBuffer
// forget the error
err = nil
case syscall.EILSEQ, syscall.EINVAL:
// iconv can still return these in cases where it still can proceed such as //IGNORE
if bytesRead > 0 || bytesWritten > 0 {
err = nil
}
}
}
if err == nil {
// perform a final shift state reset
_, bytesWritten, err = converter.Convert(nil, outputBuffer[totalBytesWritten:])
// update total count
totalBytesWritten += bytesWritten
}
// construct the final output string
output = string(outputBuffer[:totalBytesWritten])
return output, err
}
func finalizeConverter(converter *Converter) {
converter.Close()
}