You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

162 lines
4.9 KiB

  1. package iconv
  2. /*
  3. #cgo darwin LDFLAGS: -liconv
  4. #cgo freebsd LDFLAGS: -liconv
  5. #cgo windows LDFLAGS: -liconv
  6. #include <stdlib.h>
  7. #include <iconv.h>
  8. */
  9. import "C"
  10. import "syscall"
  11. import "unsafe"
  12. type Converter struct {
  13. context C.iconv_t
  14. open bool
  15. }
  16. // Initialize a new Converter. If fromEncoding or toEncoding are not supported by
  17. // iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
  18. // there is not enough memory to initialize an iconv descriptor
  19. func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
  20. converter = new(Converter)
  21. // convert to C strings
  22. toEncodingC := C.CString(toEncoding)
  23. fromEncodingC := C.CString(fromEncoding)
  24. // open an iconv descriptor
  25. converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)
  26. // free the C Strings
  27. C.free(unsafe.Pointer(toEncodingC))
  28. C.free(unsafe.Pointer(fromEncodingC))
  29. // check err
  30. if err == nil {
  31. // no error, mark the context as open
  32. converter.open = true
  33. }
  34. return
  35. }
  36. // destroy is called during garbage collection
  37. func (this *Converter) destroy() {
  38. this.Close()
  39. }
  40. // Close a Converter's iconv description explicitly
  41. func (this *Converter) Close() (err error) {
  42. if this.open {
  43. _, err = C.iconv_close(this.context)
  44. }
  45. return
  46. }
  47. // Convert bytes from an input byte slice into a give output byte slice
  48. //
  49. // As many bytes that can converted and fit into the size of output will be
  50. // processed and the number of bytes read for input as well as the number of
  51. // bytes written to output will be returned. If not all converted bytes can fit
  52. // into output and E2BIG error will also be returned. If input contains an invalid
  53. // sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
  54. //
  55. // For shift based output encodings, any end shift byte sequences can be generated by
  56. // passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
  57. // will simply reset the iconv descriptor shift state without writing any bytes.
  58. func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
  59. // make sure we are still open
  60. if this.open {
  61. inputLeft := C.size_t(len(input))
  62. outputLeft := C.size_t(len(output))
  63. if inputLeft > 0 && outputLeft > 0 {
  64. // we have to give iconv a pointer to a pointer of the underlying
  65. // storage of each byte slice - so far this is the simplest
  66. // way i've found to do that in Go, but it seems ugly
  67. inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
  68. outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
  69. _, err = C.iconv(this.context, &inputPointer, &inputLeft, &outputPointer, &outputLeft)
  70. // update byte counters
  71. bytesRead = len(input) - int(inputLeft)
  72. bytesWritten = len(output) - int(outputLeft)
  73. } else if inputLeft == 0 && outputLeft > 0 {
  74. // inputPointer will be nil, outputPointer is generated as above
  75. outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
  76. _, err = C.iconv(this.context, nil, &inputLeft, &outputPointer, &outputLeft)
  77. // update write byte counter
  78. bytesWritten = len(output) - int(outputLeft)
  79. } else {
  80. // both input and output are zero length, do a shift state reset
  81. _, err = C.iconv(this.context, nil, &inputLeft, nil, &outputLeft)
  82. }
  83. } else {
  84. err = syscall.EBADF
  85. }
  86. return bytesRead, bytesWritten, err
  87. }
  88. // Convert an input string
  89. //
  90. // EILSEQ error may be returned if input contains invalid bytes for the
  91. // Converter's fromEncoding.
  92. func (this *Converter) ConvertString(input string) (output string, err error) {
  93. // make sure we are still open
  94. if this.open {
  95. // construct the buffers
  96. inputBuffer := []byte(input)
  97. outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
  98. // call Convert until all input bytes are read or an error occurs
  99. var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
  100. for totalBytesRead < len(inputBuffer) && err == nil {
  101. // use the totals to create buffer slices
  102. bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
  103. totalBytesRead += bytesRead
  104. totalBytesWritten += bytesWritten
  105. // check for the E2BIG error specifically, we can add to the output
  106. // buffer to correct for it and then continue
  107. if err == syscall.E2BIG {
  108. // increase the size of the output buffer by another input length
  109. // first, create a new buffer
  110. tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
  111. // copy the existing data
  112. copy(tempBuffer, outputBuffer)
  113. // switch the buffers
  114. outputBuffer = tempBuffer
  115. // forget the error
  116. err = nil
  117. }
  118. }
  119. if err == nil {
  120. // perform a final shift state reset
  121. _, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
  122. // update total count
  123. totalBytesWritten += bytesWritten
  124. }
  125. // construct the final output string
  126. output = string(outputBuffer[:totalBytesWritten])
  127. } else {
  128. err = syscall.EBADF
  129. }
  130. return output, err
  131. }