diff options
| author | Mark Pulford <mark@kyne.com.au> | 2011-05-08 20:26:09 +0930 |
|---|---|---|
| committer | Mark Pulford <mark@kyne.com.au> | 2011-05-08 20:26:09 +0930 |
| commit | 4dc56c6d362f2cd8a79d83369f0b852df07dae3f (patch) | |
| tree | d51d3470a396c7981871b4f6fe4fd331e180db83 /tests/test.lua | |
| parent | eeebeda88e62fefa87c71d616d5719782bdaa45a (diff) | |
| download | lua-cjson-4dc56c6d362f2cd8a79d83369f0b852df07dae3f.tar.gz lua-cjson-4dc56c6d362f2cd8a79d83369f0b852df07dae3f.tar.bz2 lua-cjson-4dc56c6d362f2cd8a79d83369f0b852df07dae3f.zip | |
Add UTF-16 surrogate pair decode support
- Add tests for UTF-16 decoding and failures
- Add getutf8.pl to assist with UTF-16 decode testing
- Re-add test_decode_cycle() which was accidentally removed earlier
- Rename bytestring.dat to octets-escaped.dat
Diffstat (limited to '')
| -rwxr-xr-x | tests/test.lua | 66 |
1 files changed, 64 insertions, 2 deletions
diff --git a/tests/test.lua b/tests/test.lua index 9075bab..0e0aad8 100755 --- a/tests/test.lua +++ b/tests/test.lua | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | -- CJSON tests | 3 | -- CJSON tests |
| 4 | -- | 4 | -- |
| 5 | -- Mark Pulford <mark@kyne.com.au> | 5 | -- Mark Pulford <mark@kyne.com.au> |
| 6 | -- | ||
| 7 | -- Note: The output of this script is easier to read with "less -S" | ||
| 6 | 8 | ||
| 7 | require "common" | 9 | require "common" |
| 8 | local json = require "cjson" | 10 | local json = require "cjson" |
| @@ -95,13 +97,73 @@ local function gen_ascii() | |||
| 95 | return table.concat(chars) | 97 | return table.concat(chars) |
| 96 | end | 98 | end |
| 97 | 99 | ||
| 100 | -- Generate every UTF-16 codepoint, including supplementary codes | ||
| 101 | local function gen_utf16_escaped() | ||
| 102 | -- Create raw table escapes | ||
| 103 | local utf16_escaped = {} | ||
| 104 | local count = 0 | ||
| 105 | |||
| 106 | local function append_escape(code) | ||
| 107 | local esc = string.format('\\u%04X', code) | ||
| 108 | table.insert(utf16_escaped, esc) | ||
| 109 | end | ||
| 110 | |||
| 111 | table.insert(utf16_escaped, '"') | ||
| 112 | for i = 0, 0xD7FF do | ||
| 113 | append_escape(i) | ||
| 114 | end | ||
| 115 | -- Skip 0xD800 - 0xDFFF since they are used to encode supplementary | ||
| 116 | -- codepoints | ||
| 117 | for i = 0xE000, 0xFFFF do | ||
| 118 | append_escape(i) | ||
| 119 | end | ||
| 120 | -- Append surrogate pair for each supplementary codepoint | ||
| 121 | for high = 0xD800, 0xDBFF do | ||
| 122 | for low = 0xDC00, 0xDFFF do | ||
| 123 | append_escape(high) | ||
| 124 | append_escape(low) | ||
| 125 | end | ||
| 126 | end | ||
| 127 | table.insert(utf16_escaped, '"') | ||
| 128 | |||
| 129 | return table.concat(utf16_escaped) | ||
| 130 | end | ||
| 131 | |||
| 98 | local octets_raw = gen_ascii() | 132 | local octets_raw = gen_ascii() |
| 99 | local octets_escaped = file_load("bytestring.dat") | 133 | local octets_escaped = file_load("octets-escaped.dat") |
| 134 | local utf8_loaded, utf8_raw = pcall(file_load, "utf8.dat") | ||
| 135 | if not utf8_loaded then | ||
| 136 | utf8_raw = "Failed to load utf8.dat" | ||
| 137 | end | ||
| 138 | local utf16_escaped = gen_utf16_escaped() | ||
| 139 | |||
| 100 | local escape_tests = { | 140 | local escape_tests = { |
| 141 | -- Test 8bit clean | ||
| 101 | { json.encode, { octets_raw }, true, { octets_escaped } }, | 142 | { json.encode, { octets_raw }, true, { octets_escaped } }, |
| 102 | { json.decode, { octets_escaped }, true, { octets_raw } } | 143 | { json.decode, { octets_escaped }, true, { octets_raw } }, |
| 144 | -- Ensure high bits are removed from surrogate codes | ||
| 145 | { json.decode, { '"\\uF800"' }, true, { "\239\160\128" } }, | ||
| 146 | -- Test inverted surrogate pairs | ||
| 147 | { json.decode, { '"\\uDB00\\uD800"' }, | ||
| 148 | false, { "Expected value but found invalid unicode escape code at character 2" } }, | ||
| 149 | -- Test 2x high surrogate code units | ||
| 150 | { json.decode, { '"\\uDB00\\uDB00"' }, | ||
| 151 | false, { "Expected value but found invalid unicode escape code at character 2" } }, | ||
| 152 | -- Test invalid 2nd escape | ||
| 153 | { json.decode, { '"\\uDB00\\"' }, | ||
| 154 | false, { "Expected value but found invalid unicode escape code at character 2" } }, | ||
| 155 | { json.decode, { '"\\uDB00\\uD"' }, | ||
| 156 | false, { "Expected value but found invalid unicode escape code at character 2" } }, | ||
| 157 | -- Test decoding of all UTF-16 escapes | ||
| 158 | { json.decode, { utf16_escaped }, true, { utf8_raw } } | ||
| 103 | } | 159 | } |
| 104 | 160 | ||
| 161 | function test_decode_cycle(filename) | ||
| 162 | local obj1 = json.decode(file_load(filename)) | ||
| 163 | local obj2 = json.decode(json.encode(obj1)) | ||
| 164 | return compare_values(obj1, obj2) | ||
| 165 | end | ||
| 166 | |||
| 105 | run_test_group("decode simple value", simple_value_tests) | 167 | run_test_group("decode simple value", simple_value_tests) |
| 106 | run_test_group("decode numeric", numeric_tests) | 168 | run_test_group("decode numeric", numeric_tests) |
| 107 | 169 | ||
