You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
4.9 KiB

  1. package iconv
  2. /*
  3. #cgo darwin LDFLAGS: -liconv
  4. #include <stdlib.h>
  5. #include <iconv.h>
  6. */
  7. import "C"
  8. import "syscall"
  9. import "unsafe"
  10. type Converter struct {
  11. context C.iconv_t
  12. open bool
  13. }
  14. // Initialize a new Converter. If fromEncoding or toEncoding are not supported by
  15. // iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
  16. // there is not enough memory to initialize an iconv descriptor
  17. func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
  18. converter = new(Converter)
  19. // convert to C strings
  20. toEncodingC := C.CString(toEncoding)
  21. fromEncodingC := C.CString(fromEncoding)
  22. // open an iconv descriptor
  23. converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)
  24. // free the C Strings
  25. C.free(unsafe.Pointer(toEncodingC))
  26. C.free(unsafe.Pointer(fromEncodingC))
  27. // check err
  28. if err == nil {
  29. // no error, mark the context as open
  30. converter.open = true
  31. }
  32. return
  33. }
  34. // destroy is called during garbage collection
  35. func (this *Converter) destroy() {
  36. this.Close()
  37. }
  38. // Close a Converter's iconv description explicitly
  39. func (this *Converter) Close() (err error) {
  40. if this.open {
  41. _, err = C.iconv_close(this.context)
  42. }
  43. return
  44. }
  45. // Convert bytes from an input byte slice into a give output byte slice
  46. //
  47. // As many bytes that can converted and fit into the size of output will be
  48. // processed and the number of bytes read for input as well as the number of
  49. // bytes written to output will be returned. If not all converted bytes can fit
  50. // into output and E2BIG error will also be returned. If input contains an invalid
  51. // sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
  52. //
  53. // For shift based output encodings, any end shift byte sequences can be generated by
  54. // passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
  55. // will simply reset the iconv descriptor shift state without writing any bytes.
  56. func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
  57. // make sure we are still open
  58. if this.open {
  59. inputLeft := C.size_t(len(input))
  60. outputLeft := C.size_t(len(output))
  61. if inputLeft > 0 && outputLeft > 0 {
  62. // we have to give iconv a pointer to a pointer of the underlying
  63. // storage of each byte slice - so far this is the simplest
  64. // way i've found to do that in Go, but it seems ugly
  65. inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
  66. outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
  67. _, err = C.iconv(this.context, &inputPointer, &inputLeft, &outputPointer, &outputLeft)
  68. // update byte counters
  69. bytesRead = len(input) - int(inputLeft)
  70. bytesWritten = len(output) - int(outputLeft)
  71. } else if inputLeft == 0 && outputLeft > 0 {
  72. // inputPointer will be nil, outputPointer is generated as above
  73. outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
  74. _, err = C.iconv(this.context, nil, &inputLeft, &outputPointer, &outputLeft)
  75. // update write byte counter
  76. bytesWritten = len(output) - int(outputLeft)
  77. } else {
  78. // both input and output are zero length, do a shift state reset
  79. _, err = C.iconv(this.context, nil, &inputLeft, nil, &outputLeft)
  80. }
  81. } else {
  82. err = syscall.EBADF
  83. }
  84. return bytesRead, bytesWritten, err
  85. }
  86. // Convert an input string
  87. //
  88. // EILSEQ error may be returned if input contains invalid bytes for the
  89. // Converter's fromEncoding.
  90. func (this *Converter) ConvertString(input string) (output string, err error) {
  91. // make sure we are still open
  92. if this.open {
  93. // construct the buffers
  94. inputBuffer := []byte(input)
  95. outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
  96. // call Convert until all input bytes are read or an error occurs
  97. var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
  98. for totalBytesRead < len(inputBuffer) && err == nil {
  99. // use the totals to create buffer slices
  100. bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
  101. totalBytesRead += bytesRead
  102. totalBytesWritten += bytesWritten
  103. // check for the E2BIG error specifically, we can add to the output
  104. // buffer to correct for it and then continue
  105. if err == syscall.E2BIG {
  106. // increase the size of the output buffer by another input length
  107. // first, create a new buffer
  108. tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
  109. // copy the existing data
  110. copy(tempBuffer, outputBuffer)
  111. // switch the buffers
  112. outputBuffer = tempBuffer
  113. // forget the error
  114. err = nil
  115. }
  116. }
  117. if err == nil {
  118. // perform a final shift state reset
  119. _, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
  120. // update total count
  121. totalBytesWritten += bytesWritten
  122. }
  123. // construct the final output string
  124. output = string(outputBuffer[:totalBytesWritten])
  125. } else {
  126. err = syscall.EBADF
  127. }
  128. return output, err
  129. }