diff options
Diffstat (limited to '')
| -rwxr-xr-x | tests/genutf8.pl | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/tests/genutf8.pl b/tests/genutf8.pl index 6a522dd..bbef91f 100755 --- a/tests/genutf8.pl +++ b/tests/genutf8.pl | |||
| @@ -2,25 +2,31 @@ | |||
| 2 | 2 | ||
| 3 | # Create test comparison data using a different UTF-8 implementation. | 3 | # Create test comparison data using a different UTF-8 implementation. |
| 4 | 4 | ||
| 5 | # The generation utf8.dat file must have the following MD5 sum: | ||
| 6 | # cff03b039d850f370a7362f3313e5268 | ||
| 7 | |||
| 5 | use strict; | 8 | use strict; |
| 6 | use warnings; | 9 | use warnings; |
| 7 | use Text::Iconv; | ||
| 8 | use FileHandle; | 10 | use FileHandle; |
| 9 | 11 | ||
| 10 | # 0xD800 - 0xDFFF are used to encode supplementary codepoints | 12 | # 0xD800 - 0xDFFF are used to encode supplementary codepoints |
| 11 | # 0x10000 - 0x10FFFF are supplementary codepoints | 13 | # 0x10000 - 0x10FFFF are supplementary codepoints |
| 12 | my (@codepoints) = (0 .. 0xD7FF, 0xE000 .. 0x10FFFF); | 14 | my (@codepoints) = (0 .. 0xD7FF, 0xE000 .. 0x10FFFF); |
| 13 | 15 | ||
| 14 | my ($utf32be) = pack("N*", @codepoints); | 16 | my ($utf8); |
| 15 | my $iconv = Text::Iconv->new("UTF-32BE", "UTF-8"); | 17 | { |
| 16 | my ($utf8) = $iconv->convert($utf32be); | 18 | # Hide "Unicode character X is illegal" warnings. |
| 19 | # We want all the codes to test the UTF-8 escape decoder. | ||
| 20 | no warnings; | ||
| 21 | $utf8 = pack("U*", @codepoints); | ||
| 22 | } | ||
| 17 | defined($utf8) or die "Unable create UTF-8 string\n"; | 23 | defined($utf8) or die "Unable create UTF-8 string\n"; |
| 18 | 24 | ||
| 19 | my $fh = FileHandle->new(); | 25 | my $fh = FileHandle->new(); |
| 20 | $fh->open("utf8.dat", ">") | 26 | $fh->open("utf8.dat", ">:utf8") |
| 21 | or die "Unable to open utf8.dat: $!\n"; | 27 | or die "Unable to open utf8.dat: $!\n"; |
| 22 | $fh->print($utf8) | 28 | $fh->write($utf8) |
| 23 | or die "Unable to write utf.dat\n"; | 29 | or die "Unable to write utf8.dat\n"; |
| 24 | $fh->close(); | 30 | $fh->close(); |
| 25 | 31 | ||
| 26 | # vi:ai et sw=4 ts=4: | 32 | # vi:ai et sw=4 ts=4: |
