You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

502 lines
13 KiB

  1. package iconv
  2. import (
  3. "bytes"
  4. "io"
  5. "strings"
  6. "syscall"
  7. "testing"
  8. )
  9. type iconvTest struct {
  10. description string
  11. input string
  12. inputEncoding string
  13. output string
  14. outputEncoding string
  15. bytesRead int
  16. bytesWritten int
  17. convertErr error // err from Convert (raw iconv)
  18. err error // err from CovertString, Reader, Writer
  19. }
  20. var (
  21. iconvTests = []iconvTest{
  22. iconvTest{
  23. "simple utf-8 to latin1 conversion success",
  24. "Hello World!", "utf-8",
  25. "Hello World!", "latin1",
  26. 12, 12, nil, nil,
  27. },
  28. iconvTest{
  29. "invalid source encoding causes EINVAL",
  30. "", "doesnotexist",
  31. "", "utf-8",
  32. 0, 0, syscall.EINVAL, syscall.EINVAL,
  33. },
  34. iconvTest{
  35. "invalid destination encoding causes EINVAL",
  36. "", "utf-8",
  37. "", "doesnotexist",
  38. 0, 0, syscall.EINVAL, syscall.EINVAL,
  39. },
  40. iconvTest{
  41. "utf-8 to utf-8 passthrough",
  42. "Hello world!", "utf-8",
  43. "Hello world!", "utf-8",
  44. 12, 12, nil, nil,
  45. },
  46. iconvTest{
  47. "utf-8 to utf-8 partial",
  48. "Hello\xFFWorld!", "utf-8",
  49. "Hello", "utf-8",
  50. 5, 5, syscall.EILSEQ, syscall.EILSEQ,
  51. },
  52. iconvTest{
  53. "utf-8 to utf-8 ignored",
  54. "Hello \xFFWorld!", "utf-8",
  55. "Hello World!", "utf-8//IGNORE",
  56. 13, 12, syscall.EILSEQ, nil,
  57. },
  58. iconvTest{
  59. "invalid input sequence causes EILSEQ",
  60. "\xFF", "utf-8",
  61. "", "latin1",
  62. 0, 0, syscall.EILSEQ, syscall.EILSEQ,
  63. },
  64. iconvTest{
  65. "incomplete input sequence causes EINVAL",
  66. "\xC2", "utf-8",
  67. "", "latin1",
  68. 0, 0, syscall.EINVAL, syscall.EINVAL,
  69. },
  70. iconvTest{
  71. "invalid input causes partial output and EILSEQ",
  72. "Hello\xFF", "utf-8",
  73. "Hello", "latin1",
  74. 5, 5, syscall.EILSEQ, syscall.EILSEQ,
  75. },
  76. iconvTest{
  77. "incomplete input causes partial output and EILSEQ",
  78. "Hello\xC2", "utf-8",
  79. "Hello", "latin1",
  80. 5, 5, syscall.EINVAL, syscall.EINVAL,
  81. },
  82. /* this is only true for glibc / iconv
  83. iconvTest{
  84. "valid input but no conversion causes EILSEQ",
  85. "你好世界 Hello World", "utf-8",
  86. "", "latin1",
  87. 0, 0, syscall.EILSEQ, syscall.EILSEQ,
  88. },*/
  89. iconvTest{
  90. "invalid input with ignore",
  91. "Hello\xFF World!", "utf-8",
  92. "Hello World!", "latin1//IGNORE",
  93. 13, 12, syscall.EILSEQ, nil,
  94. },
  95. iconvTest{
  96. "valid input but no conversion with IGNORE",
  97. "你好世界 Hello World", "utf-8",
  98. " Hello World", "latin1//IGNORE",
  99. 24, 12, syscall.EILSEQ, nil,
  100. },
  101. iconvTest{
  102. "valid input but no conversion with TRANSLIT",
  103. "你好世界 Hello World", "utf-8",
  104. "???? Hello World", "latin1//TRANSLIT",
  105. 24, 16, nil, nil,
  106. },
  107. }
  108. ignoreDetected, translitDetected bool
  109. )
  110. func init() {
  111. // detect if IGNORE / TRANSLIT is supported (glic / libiconv)
  112. conv, err := NewConverter("utf-8", "ascii//IGNORE")
  113. if err == nil {
  114. ignoreDetected = true
  115. conv.Close()
  116. }
  117. conv, err = NewConverter("utf-8", "ascii//TRANSLIT")
  118. if err == nil {
  119. translitDetected = true
  120. conv.Close()
  121. }
  122. }
  123. func runTests(t *testing.T, f func(iconvTest, *testing.T) (int, int, string, error)) {
  124. for _, test := range iconvTests {
  125. t.Run(test.description, func(t *testing.T) {
  126. if !ignoreDetected && strings.HasSuffix(test.outputEncoding, "//IGNORE") {
  127. t.Skip("//IGNORE not supported")
  128. }
  129. if !translitDetected && strings.HasSuffix(test.outputEncoding, "//TRANSLIT") {
  130. t.Skip("//TRANSLIT not supported")
  131. }
  132. bytesRead, bytesWritten, output, err := f(test, t)
  133. // check that bytesRead is same as expected
  134. if bytesRead != test.bytesRead {
  135. t.Errorf("bytesRead: %d expected: %d", bytesRead, test.bytesRead)
  136. }
  137. // check that bytesWritten is same as expected
  138. if bytesWritten != test.bytesWritten {
  139. t.Errorf("bytesWritten: %d expected: %d", bytesWritten, test.bytesWritten)
  140. }
  141. // check output bytes against expected
  142. if output != test.output {
  143. t.Errorf("output: %x expected: %x", output, test.output)
  144. }
  145. // check that err is same as expected
  146. if err != test.err {
  147. if test.err != nil {
  148. if err != nil {
  149. t.Errorf("err: %q expected: %q", err, test.err)
  150. } else {
  151. t.Errorf("err: nil expected %q", test.err)
  152. }
  153. } else {
  154. t.Errorf("unexpected error: %q", err)
  155. }
  156. }
  157. })
  158. }
  159. }
  160. func TestConvert(t *testing.T) {
  161. runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
  162. input := []byte(test.input)
  163. output := make([]byte, 50)
  164. // peform the conversion
  165. bytesRead, bytesWritten, err := Convert(input, output, test.inputEncoding, test.outputEncoding)
  166. // HACK Convert has different erorrs, so check ourselves, and then fake out later check
  167. if err != test.convertErr {
  168. if test.convertErr != nil {
  169. if err != nil {
  170. t.Errorf("err: %q expected: %q", err, test.convertErr)
  171. } else {
  172. t.Errorf("err: nil expected %q", test.convertErr)
  173. }
  174. } else {
  175. t.Errorf("unexpected error: %q", err)
  176. }
  177. }
  178. err = test.err
  179. return bytesRead, bytesWritten, string(output[:bytesWritten]), err
  180. })
  181. }
  182. func TestConvertString(t *testing.T) {
  183. runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
  184. // perform the conversion
  185. output, err := ConvertString(test.input, test.inputEncoding, test.outputEncoding)
  186. // bytesRead and bytesWritten are spoofed a little
  187. return test.bytesRead, len(output), output, err
  188. })
  189. }
  190. func TestReader(t *testing.T) {
  191. runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
  192. var bytesRead, bytesWritten, finalBytesWritten int
  193. var err error
  194. input := bytes.NewBufferString(test.input)
  195. output := make([]byte, 50)
  196. reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
  197. if err == nil {
  198. bytesWritten, err = reader.Read(output)
  199. // we can compute how many bytes iconv read by inspecting the reader state
  200. bytesRead = len([]byte(test.input)) - input.Len() - (reader.writePos - reader.readPos)
  201. // with current tests and buffer sizes, we'd expect all input to be buffered if we called read
  202. if input.Len() != 0 {
  203. t.Error("not all bytes from input were buffered")
  204. }
  205. // do final read test if we can - either get EOF or same test error
  206. if err == nil {
  207. finalBytesWritten, err = reader.Read(output[bytesWritten:])
  208. if finalBytesWritten != 0 {
  209. t.Errorf("finalBytesWritten: %d expected: 0", finalBytesWritten)
  210. }
  211. if err == io.EOF {
  212. err = nil
  213. }
  214. }
  215. }
  216. return bytesRead, bytesWritten, string(output[:bytesWritten]), err
  217. })
  218. }
  219. func TestWriter(t *testing.T) {
  220. runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
  221. var bytesRead, bytesWritten int
  222. var err error
  223. input := []byte(test.input)
  224. output := new(bytes.Buffer)
  225. writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
  226. if err == nil {
  227. bytesRead, err = writer.Write(input)
  228. bytesRead -= writer.readPos
  229. writer.Close()
  230. bytesWritten = output.Len()
  231. }
  232. return bytesRead, bytesWritten, output.String(), err
  233. })
  234. }
  235. func TestReaderWithCopy(t *testing.T) {
  236. runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
  237. input := bytes.NewBufferString(test.input)
  238. output := new(bytes.Buffer)
  239. reader, err := NewReader(input, test.inputEncoding, test.outputEncoding)
  240. if err == nil {
  241. _, err := io.Copy(output, reader)
  242. bytesRead := len(test.input) - input.Len() - reader.writePos
  243. bytesWritten := output.Len()
  244. return bytesRead, bytesWritten, output.String(), err
  245. }
  246. return 0, 0, output.String(), err
  247. })
  248. }
  249. func TestWriterWithCopy(t *testing.T) {
  250. runTests(t, func(test iconvTest, t *testing.T) (int, int, string, error) {
  251. input := bytes.NewBufferString(test.input)
  252. output := new(bytes.Buffer)
  253. writer, err := NewWriter(output, test.inputEncoding, test.outputEncoding)
  254. if err == nil {
  255. bytesCopied, err := io.Copy(writer, input)
  256. bytesRead := int(bytesCopied) - writer.readPos
  257. writer.Close()
  258. bytesWritten := output.Len()
  259. return bytesRead, bytesWritten, output.String(), err
  260. }
  261. return 0, 0, output.String(), err
  262. })
  263. }
  264. func TestReaderMultipleReads(t *testing.T) {
  265. // setup a source reader and our expected output string
  266. source := bytes.NewBufferString("\x80\x8A\x99\x95\x8B\x86\x87")
  267. expected := "€Š™•‹†‡"
  268. // setup reader - use our minimum buffer size so we can force it to shuffle the buffer around
  269. reader, err := NewReaderSized(source, "cp1252", "utf-8", minReadBufferSize)
  270. if err != nil {
  271. if err == syscall.EINVAL {
  272. t.Skip("Either cp1252 or utf-8 isn't supported by iconv on your system")
  273. } else {
  274. t.Fatalf("Unexpected error when creating reader: %s", err)
  275. }
  276. }
  277. // setup a read buffer - we'll slice it to different sizes in our tests
  278. buffer := make([]byte, 64)
  279. // first read should fill internal buffer, but we'll only read part of it
  280. bytesRead, err := reader.Read(buffer[:5])
  281. if bytesRead != 5 || err != nil {
  282. t.Fatalf("first read did not give expected 5, nil: %d, %s", bytesRead, err)
  283. }
  284. // because of how small teh source is and our minimum buffer size, source shoudl be fully read
  285. if source.Len() != 0 {
  286. t.Fatalf("first read did not buffer all of source like expected: %d bytes remain", source.Len())
  287. }
  288. // Buffer doesn't return EOF with last bytes, reader shouldn't know its EOF yet
  289. if reader.eof {
  290. t.Fatalf("first read was not expected to receive EOF")
  291. }
  292. // second read should shift internal buffer, and fill again - make buffer too small for last utf-8 character
  293. // E2BIG from iconv should be ignored because we wrote at least 1 byte
  294. bytesRead, err = reader.Read(buffer[5:18])
  295. if bytesRead != 12 || err != nil {
  296. t.Fatalf("second read did not give expected 15, nil: %d, %s", bytesRead, err)
  297. }
  298. if !reader.eof {
  299. t.Fatalf("second read did not put reader into eof state")
  300. }
  301. // try to read the last 3 byte character with only a buffer of 2 bytes - this time we should see the E2BIG
  302. bytesRead, err = reader.Read(buffer[17:19])
  303. if bytesRead != 0 || err != syscall.E2BIG {
  304. t.Fatalf("third read did not give expected 0, E2BIG: %d, %s", bytesRead, err)
  305. }
  306. // fourth read should finish last character
  307. bytesRead, err = reader.Read(buffer[17:])
  308. if bytesRead != 3 || err != nil {
  309. t.Fatalf("fourth read did not give expected 3, nil: %d, %s", bytesRead, err)
  310. }
  311. // last read should be EOF
  312. bytesRead, err = reader.Read(buffer[20:])
  313. if bytesRead != 0 || err != io.EOF {
  314. t.Fatalf("final read did not give expected 0, EOF: %d, %s", bytesRead, err)
  315. }
  316. // check full utf-8 output
  317. if string(buffer[:20]) != expected {
  318. t.Fatalf("output did not match expected %q: %q", expected, string(buffer[:20]))
  319. }
  320. }
  321. func TestWriteWithIncompleteSequence(t *testing.T) {
  322. expected := "\x80\x8A\x99\x95\x8B\x86\x87"
  323. input := []byte("€Š™•‹†‡")
  324. output := new(bytes.Buffer)
  325. writer, err := NewWriter(output, "utf-8", "cp1252")
  326. if err != nil {
  327. t.Fatalf("unexpected error while creating writer %q", err)
  328. }
  329. // the input string is made of 3 byte characters, for the test we want to only write part of the last character
  330. bytesFromBuffer := len(input) - 2
  331. bytesRead, err := writer.Write(input[:bytesFromBuffer])
  332. if bytesRead != bytesFromBuffer {
  333. t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
  334. }
  335. // finish the rest
  336. bytesRead, err = writer.Write(input[bytesFromBuffer:])
  337. if bytesRead != 2 {
  338. t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
  339. }
  340. err = writer.Close()
  341. actual := output.String()
  342. if err != nil {
  343. t.Errorf("got an error on close: %s", err)
  344. }
  345. if actual != expected {
  346. t.Errorf("output %x did not match expected %x", actual, expected)
  347. }
  348. }
  349. func TestWriteWithIncompleteSequenceAndIgnore(t *testing.T) {
  350. if !ignoreDetected {
  351. t.Skip("//IGNORE not supported")
  352. }
  353. expected := "\x80\x8A\x99\x95\x8B\x86\x87"
  354. input := []byte("€Š™•‹†‡")
  355. output := new(bytes.Buffer)
  356. writer, err := NewWriter(output, "utf-8", "cp1252//IGNORE")
  357. if err != nil {
  358. t.Fatalf("unexpected error while creating writer %q", err)
  359. }
  360. // the input string is made of 3 byte characters, for the test we want to only write part of the last character
  361. bytesFromBuffer := len(input) - 2
  362. bytesRead, err := writer.Write(input[:bytesFromBuffer])
  363. if bytesRead != bytesFromBuffer {
  364. t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
  365. }
  366. // finish the rest
  367. bytesRead, err = writer.Write(input[bytesFromBuffer:])
  368. if bytesRead != 2 {
  369. t.Fatalf("did a short write on second write: %d, %s", bytesRead, err)
  370. }
  371. err = writer.Close()
  372. actual := output.String()
  373. if err != nil {
  374. t.Errorf("got an error on close: %s", err)
  375. }
  376. if actual != expected {
  377. t.Errorf("output %x did not match expected %x", actual, expected)
  378. }
  379. }
  380. func TestWriteWithIncompleteSequenceAtEOF(t *testing.T) {
  381. expected := "\x80\x8A\x99\x95\x8B\x86"
  382. input := []byte("€Š™•‹†‡")
  383. output := new(bytes.Buffer)
  384. writer, err := NewWriter(output, "utf-8", "cp1252")
  385. if err != nil {
  386. t.Fatalf("unexpected error while creating writer %q", err)
  387. }
  388. // the input string is made of 3 byte characters, for the test we want to only write part of the last character
  389. bytesFromBuffer := len(input) - 2
  390. bytesRead, err := writer.Write(input[:bytesFromBuffer])
  391. if bytesRead != bytesFromBuffer {
  392. t.Fatalf("did a short write on first write: %d, %s", bytesRead, err)
  393. }
  394. err = writer.Close()
  395. actual := output.String()
  396. if err != nil {
  397. t.Errorf("got an error on close: %s", err)
  398. }
  399. if actual != expected {
  400. t.Errorf("output %x did not match expected %x", actual, expected)
  401. }
  402. }