選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

172 行
5.1 KiB

  1. package iconv
  2. /*
  3. #cgo darwin LDFLAGS: -liconv
  4. #cgo freebsd LDFLAGS: -liconv
  5. #cgo windows LDFLAGS: -liconv
  6. #include <stdlib.h>
  7. #include <iconv.h>
  8. #include <locale.h>
  9. // called by init, seems to be necessary for TRANSLIT to work
  10. void initLocale() {
  11. setlocale(LC_ALL, "");
  12. }
  13. */
  14. import "C"
  15. import "syscall"
  16. import "unsafe"
  17. func init() {
  18. C.initLocale()
  19. }
  20. type Converter struct {
  21. context C.iconv_t
  22. open bool
  23. }
  24. // Initialize a new Converter. If fromEncoding or toEncoding are not supported by
  25. // iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
  26. // there is not enough memory to initialize an iconv descriptor
  27. func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
  28. converter = new(Converter)
  29. // convert to C strings
  30. toEncodingC := C.CString(toEncoding)
  31. fromEncodingC := C.CString(fromEncoding)
  32. // open an iconv descriptor
  33. converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)
  34. // free the C Strings
  35. C.free(unsafe.Pointer(toEncodingC))
  36. C.free(unsafe.Pointer(fromEncodingC))
  37. // check err
  38. if err == nil {
  39. // no error, mark the context as open
  40. converter.open = true
  41. }
  42. return
  43. }
  44. // destroy is called during garbage collection
  45. func (this *Converter) destroy() {
  46. this.Close()
  47. }
  48. // Close a Converter's iconv description explicitly
  49. func (this *Converter) Close() (err error) {
  50. if this.open {
  51. _, err = C.iconv_close(this.context)
  52. }
  53. return
  54. }
  55. // Convert bytes from an input byte slice into a give output byte slice
  56. //
  57. // As many bytes that can converted and fit into the size of output will be
  58. // processed and the number of bytes read for input as well as the number of
  59. // bytes written to output will be returned. If not all converted bytes can fit
  60. // into output and E2BIG error will also be returned. If input contains an invalid
  61. // sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
  62. //
  63. // For shift based output encodings, any end shift byte sequences can be generated by
  64. // passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
  65. // will simply reset the iconv descriptor shift state without writing any bytes.
  66. func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
  67. // make sure we are still open
  68. if this.open {
  69. inputLeft := C.size_t(len(input))
  70. outputLeft := C.size_t(len(output))
  71. if inputLeft > 0 && outputLeft > 0 {
  72. // we have to give iconv a pointer to a pointer of the underlying
  73. // storage of each byte slice - so far this is the simplest
  74. // way i've found to do that in Go, but it seems ugly
  75. inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
  76. outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
  77. _, err = C.iconv(this.context, &inputPointer, &inputLeft, &outputPointer, &outputLeft)
  78. // update byte counters
  79. bytesRead = len(input) - int(inputLeft)
  80. bytesWritten = len(output) - int(outputLeft)
  81. } else if inputLeft == 0 && outputLeft > 0 {
  82. // inputPointer will be nil, outputPointer is generated as above
  83. outputPointer := (*C.char)(unsafe.Pointer(&output[0]))
  84. _, err = C.iconv(this.context, nil, &inputLeft, &outputPointer, &outputLeft)
  85. // update write byte counter
  86. bytesWritten = len(output) - int(outputLeft)
  87. } else {
  88. // both input and output are zero length, do a shift state reset
  89. _, err = C.iconv(this.context, nil, &inputLeft, nil, &outputLeft)
  90. }
  91. } else {
  92. err = syscall.EBADF
  93. }
  94. return bytesRead, bytesWritten, err
  95. }
  96. // Convert an input string
  97. //
  98. // EILSEQ error may be returned if input contains invalid bytes for the
  99. // Converter's fromEncoding.
  100. func (this *Converter) ConvertString(input string) (output string, err error) {
  101. // make sure we are still open
  102. if this.open {
  103. // construct the buffers
  104. inputBuffer := []byte(input)
  105. outputBuffer := make([]byte, len(inputBuffer)*2) // we use a larger buffer to help avoid resizing later
  106. // call Convert until all input bytes are read or an error occurs
  107. var bytesRead, totalBytesRead, bytesWritten, totalBytesWritten int
  108. for totalBytesRead < len(inputBuffer) && err == nil {
  109. // use the totals to create buffer slices
  110. bytesRead, bytesWritten, err = this.Convert(inputBuffer[totalBytesRead:], outputBuffer[totalBytesWritten:])
  111. totalBytesRead += bytesRead
  112. totalBytesWritten += bytesWritten
  113. // check for the E2BIG error specifically, we can add to the output
  114. // buffer to correct for it and then continue
  115. if err == syscall.E2BIG {
  116. // increase the size of the output buffer by another input length
  117. // first, create a new buffer
  118. tempBuffer := make([]byte, len(outputBuffer)+len(inputBuffer))
  119. // copy the existing data
  120. copy(tempBuffer, outputBuffer)
  121. // switch the buffers
  122. outputBuffer = tempBuffer
  123. // forget the error
  124. err = nil
  125. }
  126. }
  127. if err == nil {
  128. // perform a final shift state reset
  129. _, bytesWritten, err = this.Convert([]byte{}, outputBuffer[totalBytesWritten:])
  130. // update total count
  131. totalBytesWritten += bytesWritten
  132. }
  133. // construct the final output string
  134. output = string(outputBuffer[:totalBytesWritten])
  135. } else {
  136. err = syscall.EBADF
  137. }
  138. return output, err
  139. }