2011-01-15 07:34:30 +08:00
|
|
|
package iconv
|
|
|
|
|
2011-01-17 03:40:30 +08:00
|
|
|
/*
|
2013-06-28 04:56:58 +08:00
|
|
|
#cgo darwin LDFLAGS: -liconv
|
2012-06-23 09:33:59 +08:00
|
|
|
#cgo freebsd LDFLAGS: -liconv
|
|
|
|
#cgo windows LDFLAGS: -liconv
|
2011-01-17 03:40:30 +08:00
|
|
|
#include <stdlib.h>
|
2011-01-29 14:31:00 +08:00
|
|
|
#include <iconv.h>
|
2017-04-25 12:37:24 +08:00
|
|
|
#include <locale.h>
|
2016-02-21 03:24:08 +08:00
|
|
|
|
|
|
|
// As of GO 1.6 passing a pointer to Go pointer, will lead to panic
|
|
|
|
// Therofore we use this wrapper function, to avoid passing **char directly from go
|
|
|
|
size_t call_iconv(iconv_t ctx, char *in, size_t *size_in, char *out, size_t *size_out){
|
|
|
|
return iconv(ctx, &in, size_in, &out, size_out);
|
|
|
|
}
|
|
|
|
|
2011-01-17 03:40:30 +08:00
|
|
|
*/
|
2011-01-15 07:34:30 +08:00
|
|
|
import "C"
|
2012-04-11 06:30:42 +08:00
|
|
|
import "syscall"
|
2011-01-29 14:31:00 +08:00
|
|
|
import "unsafe"
|
2011-01-15 07:34:30 +08:00
|
|
|
|
|
|
|
type Converter struct {
|
|
|
|
context C.iconv_t
|
|
|
|
}
|
|
|
|
|
2011-01-29 14:31:00 +08:00
|
|
|
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
|
|
|
|
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
|
|
|
|
// there is not enough memory to initialize an iconv descriptor
|
2017-04-25 12:37:24 +08:00
|
|
|
func NewConverter(fromEncoding string, toEncoding string) (*Converter, error) {
|
2011-01-29 14:31:00 +08:00
|
|
|
// convert to C strings
|
2011-01-17 03:40:30 +08:00
|
|
|
toEncodingC := C.CString(toEncoding)
|
|
|
|
fromEncodingC := C.CString(fromEncoding)
|
|
|
|
|
|
|
|
// open an iconv descriptor
|
2017-04-25 12:37:24 +08:00
|
|
|
context, err := C.iconv_open(toEncodingC, fromEncodingC)
|
2011-01-17 03:40:30 +08:00
|
|
|
|
|
|
|
// free the C Strings
|
|
|
|
C.free(unsafe.Pointer(toEncodingC))
|
|
|
|
C.free(unsafe.Pointer(fromEncodingC))
|
2011-01-15 07:34:30 +08:00
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2011-01-15 07:34:30 +08:00
|
|
|
}
|
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
return &Converter{context}, nil
|
2011-01-15 07:34:30 +08:00
|
|
|
}
|
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
// Close a Converter's iconv descriptor explicitly
|
|
|
|
func (converter *Converter) Close() error {
|
|
|
|
_, err := C.iconv_close(converter.context)
|
|
|
|
return err
|
2011-01-15 07:34:30 +08:00
|
|
|
}
|
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
// Reset state of iconv context
|
|
|
|
func (converter *Converter) Reset() error {
|
|
|
|
_, _, err := converter.Convert(nil, nil)
|
|
|
|
return err
|
2011-01-15 07:34:30 +08:00
|
|
|
}
|
|
|
|
|
2011-01-29 14:31:00 +08:00
|
|
|
// Convert bytes from an input byte slice into a give output byte slice
|
2011-01-15 07:34:30 +08:00
|
|
|
//
|
2011-01-29 14:31:00 +08:00
|
|
|
// As many bytes that can converted and fit into the size of output will be
|
|
|
|
// processed and the number of bytes read for input as well as the number of
|
|
|
|
// bytes written to output will be returned. If not all converted bytes can fit
|
|
|
|
// into output and E2BIG error will also be returned. If input contains an invalid
|
|
|
|
// sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
|
|
|
|
//
|
|
|
|
// For shift based output encodings, any end shift byte sequences can be generated by
|
|
|
|
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
|
|
|
|
// will simply reset the iconv descriptor shift state without writing any bytes.
|
2017-04-25 12:37:24 +08:00
|
|
|
func (converter *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
|
|
|
|
inputLeft := C.size_t(len(input))
|
|
|
|
outputLeft := C.size_t(len(output))
|
|
|
|
|
|
|
|
var inputPointer, outputPointer *C.char
|
|
|
|
|
|
|
|
if inputLeft > 0 {
|
|
|
|
inputPointer = (*C.char)(unsafe.Pointer(&input[0]))
|
|
|
|
}
|
|
|
|
|
|
|
|
if outputLeft > 0 {
|
|
|
|
outputPointer = (*C.char)(unsafe.Pointer(&output[0]))
|
2011-01-15 07:34:30 +08:00
|
|
|
}
|
2011-01-29 14:31:00 +08:00
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
_, err = C.call_iconv(converter.context, inputPointer, &inputLeft, outputPointer, &outputLeft)
|
|
|
|
|
|
|
|
bytesRead = len(input) - int(inputLeft)
|
|
|
|
bytesWritten = len(output) - int(outputLeft)
|
|
|
|
|
2011-01-15 07:34:30 +08:00
|
|
|
return bytesRead, bytesWritten, err
|
|
|
|
}
|
|
|
|
|
2011-01-29 14:31:00 +08:00
|
|
|
// Convert an input string
|
|
|
|
//
|
2017-04-25 12:37:24 +08:00
|
|
|
// EILSEQ error may be returned if input contains invalid bytes for the Converter's fromEncoding
|
|
|
|
func (converter *Converter) ConvertString(input string) (output string, err error) {
|
|
|
|
// construct the buffers
|
|
|
|
inputBuffer := []byte(input)
|
|
|
|
outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
|
|
|
|
|
|
|
|
// call Convert until all input bytes are read or an error occurs
|
|
|
|
var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
|
|
|
|
|
|
|
|
for totalBytesRead < len(inputBuffer) && err == nil {
|
|
|
|
// use the totals to create buffer slices
|
|
|
|
bytesRead, bytesWritten, err = converter.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
|
|
|
|
|
|
|
|
totalBytesRead += bytesRead
|
|
|
|
totalBytesWritten += bytesWritten
|
|
|
|
|
|
|
|
switch err {
|
|
|
|
case syscall.E2BIG:
|
|
|
|
// increase the size of the output buffer by another input length
|
|
|
|
// first, create a new buffer
|
|
|
|
tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
|
|
|
|
|
|
|
|
// copy the existing data
|
|
|
|
copy(tempBuffer, outputBuffer)
|
|
|
|
|
|
|
|
// switch the buffers
|
|
|
|
outputBuffer = tempBuffer
|
|
|
|
|
|
|
|
// forget the error
|
|
|
|
err = nil
|
|
|
|
case syscall.EILSEQ, syscall.EINVAL:
|
|
|
|
// iconv can still return these in cases where it still can proceed such as //IGNORE
|
|
|
|
if bytesRead > 0 || bytesWritten > 0 {
|
2011-01-29 14:31:00 +08:00
|
|
|
err = nil
|
|
|
|
}
|
|
|
|
}
|
2017-04-25 12:37:24 +08:00
|
|
|
}
|
2012-04-11 06:30:42 +08:00
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
if err == nil {
|
|
|
|
// perform a final shift state reset
|
|
|
|
_, bytesWritten, err = converter.Convert(nil, outputBuffer[totalBytesWritten:])
|
2012-04-11 06:30:42 +08:00
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
// update total count
|
|
|
|
totalBytesWritten += bytesWritten
|
2011-01-15 07:34:30 +08:00
|
|
|
}
|
|
|
|
|
2017-04-25 12:37:24 +08:00
|
|
|
// construct the final output string
|
|
|
|
output = string(outputBuffer[:totalBytesWritten])
|
|
|
|
|
2011-01-15 07:34:30 +08:00
|
|
|
return output, err
|
|
|
|
}
|
2017-04-25 12:37:24 +08:00
|
|
|
|
|
|
|
func finalizeConverter(converter *Converter) {
|
|
|
|
converter.Close()
|
|
|
|
}
|