From 2416b145073211b840781da6abf4b6d97f4657a6 Mon Sep 17 00:00:00 2001 From: Mark Pulford Date: Fri, 30 Dec 2011 14:17:44 +1030 Subject: Add fpconv to work around comma decimal points Create a separate buffer and translate comma <> dot before calling strtod(), and after calling sprintf() as required. - Add "update_locale" Lua API call and init locale on module load. - Move sprintf format string to fpconv --- CMakeLists.txt | 2 +- Makefile | 2 +- fpconv.c | 155 ++++++++++++++++++++++++++++++++++++++++++ fpconv.h | 11 +++ lua-cjson-1.0devel-1.rockspec | 4 +- lua_cjson.c | 36 ++++++---- strbuf.h | 14 +++- tests/test.lua | 2 + 8 files changed, 206 insertions(+), 20 deletions(-) create mode 100644 fpconv.c create mode 100644 fpconv.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 349342e..8d8a420 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,7 @@ else() set(_lua_module_dir "${_lua_lib_dir}/lua/5.1") endif() -add_library(cjson MODULE lua_cjson.c strbuf.c) +add_library(cjson MODULE lua_cjson.c strbuf.c fpconv.c) set_target_properties(cjson PROPERTIES PREFIX "") install(TARGETS cjson DESTINATION "${_lua_module_dir}") diff --git a/Makefile b/Makefile index 7685363..fb63d99 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,7 @@ INSTALL_CMD = install ## End platform specific section BUILD_CFLAGS = -fpic -I$(LUA_INCLUDE_DIR) $(CJSON_CFLAGS) -OBJS := lua_cjson.o strbuf.o +OBJS := lua_cjson.o strbuf.o fpconv.o .PHONY: all clean install package doc diff --git a/fpconv.c b/fpconv.c new file mode 100644 index 0000000..3ff79dc --- /dev/null +++ b/fpconv.c @@ -0,0 +1,155 @@ +#include +#include +#include +#include + +#include "fpconv.h" + +static char locale_decimal_point = '.'; + +/* In theory multibyte decimal_points are possible, but + * Lua CJSON only supports UTF-8 and known locales only have + * single byte decimal points ([.,]). + * + * localconv() may not be thread safe, and nl_langinfo() is not + * supported on some platforms. Use sprintf() instead. */ +void fpconv_update_locale() +{ + char buf[8]; + + snprintf(buf, sizeof(buf), "%g", 0.5); + + /* Failing this test might imply the platform has a buggy dtoa + * implementation or wide characters */ + if (buf[0] != '0' || buf[2] != '5' || buf[3] != 0) { + fprintf(stderr, "Error: wide characters found or printf() bug."); + abort(); + } + + locale_decimal_point = buf[1]; +} + +/* Check for a valid number character: [-+0-9a-fA-FpPxX.] + * It doesn't matter if actual invalid characters are counted - strtod() + * will find the valid number if it exists. The risk is that slightly more + * memory might be allocated before a parse error occurs. */ +static int valid_number_character(char ch) +{ + char lower_ch; + + if ('0' <= ch && ch <= '9') + return 1; + if (ch == '-' || ch == '+' || ch == '.') + return 1; + + /* Hex digits, exponent (e), base (p), "infinity",.. + * The main purpose is to not include a "comma". If any other invalid + * characters are included, the will only generate a parse error later. */ + lower_ch = ch | 0x20; + if ('a' <= lower_ch && lower_ch <= 'y') + return 1; + + return 0; +} + +/* Calculate the size of the buffer required for a locale + * conversion. Returns 0 if conversion is not required */ +static int strtod_buffer_size(const char *s) +{ + const char *p = s; + + while (valid_number_character(*p)) + p++; + + return p - s; +} + +/* Similar to strtod(), but must be passed the current locale's decimal point + * character. Guaranteed to be called at the start of any valid number in a string */ +double fpconv_strtod(const char *nptr, char **endptr) +{ + char *num, *endnum, *dp; + int numlen; + double value; + + /* System strtod() is fine when decimal point is '.' */ + if (locale_decimal_point == '.') + return strtod(nptr, endptr); + + numlen = strtod_buffer_size(nptr); + if (!numlen) { + /* No valid characters found, standard strtod() return */ + *endptr = (char *)nptr; + return 0; + } + + /* Duplicate number into buffer */ + num = malloc(numlen + 1); + if (!num) { + fprintf(stderr, "Out of memory"); + abort(); + } + memcpy(num, nptr, numlen); + num[numlen] = 0; + + /* Update decimal point character if found */ + dp = strchr(num, '.'); + if (dp) + *dp = locale_decimal_point; + + value = strtod(num, &endnum); + *endptr = (char *)&nptr[endnum - num]; + free(num); + + return value; +} + +/* "fmt" must point to a buffer of at least 6 characters */ +static void set_number_format(char *fmt, int precision) +{ + int d1, d2, i; + + assert(1 <= precision && precision <= 14); + + /* Create printf format (%.14g) from precision */ + d1 = precision / 10; + d2 = precision % 10; + fmt[0] = '%'; + fmt[1] = '.'; + i = 2; + if (d1) { + fmt[i++] = '0' + d1; + } + fmt[i++] = '0' + d2; + fmt[i++] = 'g'; + fmt[i++] = 0; +} + +/* Assumes there is always at least 32 characters available in the target buffer */ +int fpconv_g_fmt(char *str, double num, int precision) +{ + char buf[FPCONV_G_FMT_BUFSIZE]; + char fmt[6]; + int len; + char *b; + + set_number_format(fmt, precision); + + /* Pass through when decimal point character is dot. */ + if (locale_decimal_point == '.') + return snprintf(str, FPCONV_G_FMT_BUFSIZE, fmt, num); + + /* snprintf() to a buffer then translate for other decimal point characters */ + len = snprintf(buf, FPCONV_G_FMT_BUFSIZE, fmt, num); + + /* Returned 'len' includes the null terminator */ + b = buf; + do { + *str++ = (*b == locale_decimal_point ? '.' : *b); + } while(*b++); + + return len; +} + +/* vi:ai et sw=4 ts=4: + */ diff --git a/fpconv.h b/fpconv.h new file mode 100644 index 0000000..b8a6469 --- /dev/null +++ b/fpconv.h @@ -0,0 +1,11 @@ +/* Lua CJSON floating point conversion routines */ + +/* Buffer larger than required to store the largest %.14g number */ +# define FPCONV_G_FMT_BUFSIZE 32 + +extern void fpconv_update_locale(); +extern int fpconv_g_fmt(char*, double, int); +extern double fpconv_strtod(const char*, char**); + +/* vi:ai et sw=4 ts=4: + */ diff --git a/lua-cjson-1.0devel-1.rockspec b/lua-cjson-1.0devel-1.rockspec index 3890ec1..fa20c53 100644 --- a/lua-cjson-1.0devel-1.rockspec +++ b/lua-cjson-1.0devel-1.rockspec @@ -23,11 +23,11 @@ build = { type = "builtin", modules = { cjson = { - sources = { "lua_cjson.c", "strbuf.c" }, + sources = { "lua_cjson.c", "strbuf.c", "fpconv.c" }, + defines = { -- Optional workaround: -- USE_INTERNAL_ISINF: Provide internal isinf() implementation. Required -- on some Solaris platforms. - defines = { -- LuaRocks does not support platform specific configuration for Solaris. -- Uncomment the line below on Solaris platforms. -- "USE_INTERNAL_ISINF" diff --git a/lua_cjson.c b/lua_cjson.c index f5ea0dd..8e9b237 100644 --- a/lua_cjson.c +++ b/lua_cjson.c @@ -43,6 +43,7 @@ #include #include "strbuf.h" +#include "fpconv.h" #ifndef CJSON_VERSION #define CJSON_VERSION "1.0devel" @@ -60,6 +61,7 @@ #define DEFAULT_ENCODE_REFUSE_BADNUM 1 #define DEFAULT_DECODE_REFUSE_BADNUM 0 #define DEFAULT_ENCODE_KEEP_BUFFER 1 +#define DEFAULT_ENCODE_NUMBER_PRECISION 14 typedef enum { T_OBJ_BEGIN, @@ -104,7 +106,6 @@ typedef struct { char *char2escape[256]; /* Encoding */ #endif strbuf_t encode_buf; - char number_fmt[8]; /* "%.XXg\0" */ int current_depth; int encode_sparse_convert; @@ -253,12 +254,6 @@ static int json_cfg_encode_max_depth(lua_State *l) return 1; } -static void json_set_number_precision(json_config_t *cfg, int prec) -{ - cfg->encode_number_precision = prec; - sprintf(cfg->number_fmt, "%%.%dg", prec); -} - /* Configures number precision when converting doubles to text */ static int json_cfg_encode_number_precision(lua_State *l) { @@ -272,7 +267,7 @@ static int json_cfg_encode_number_precision(lua_State *l) precision = luaL_checkinteger(l, 1); luaL_argcheck(l, 1 <= precision && precision <= 14, 1, "expected integer between 1 and 14"); - json_set_number_precision(cfg, precision); + cfg->encode_number_precision = precision; } lua_pushinteger(l, cfg->encode_number_precision); @@ -342,6 +337,13 @@ static int json_cfg_refuse_invalid_numbers(lua_State *l) return 1; } +static int json_update_locale(lua_State *l) +{ + fpconv_update_locale(); + + return 0; +} + static int json_destroy_config(lua_State *l) { json_config_t *cfg; @@ -376,7 +378,7 @@ static void json_create_config(lua_State *l) cfg->encode_refuse_badnum = DEFAULT_ENCODE_REFUSE_BADNUM; cfg->decode_refuse_badnum = DEFAULT_DECODE_REFUSE_BADNUM; cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER; - json_set_number_precision(cfg, 14); + cfg->encode_number_precision = DEFAULT_ENCODE_NUMBER_PRECISION; /* Decoding init */ @@ -562,6 +564,7 @@ static void json_append_number(lua_State *l, strbuf_t *json, int index, json_config_t *cfg) { double num = lua_tonumber(l, index); + int len; if (cfg->encode_refuse_badnum && (isinf(num) || isnan(num))) json_encode_exception(l, cfg, index, "must not be NaN or Inf"); @@ -571,11 +574,10 @@ static void json_append_number(lua_State *l, strbuf_t *json, int index, strbuf_append_mem(json, "nan", 3); } else { /* Longest double printed with %.14g is 21 characters long: - * -1.7976931348623e+308 - * - * Use 32 to include the \0, and a few extra just in case.. - */ - strbuf_append_fmt(json, 32, cfg->number_fmt, num); + * -1.7976931348623e+308 */ + strbuf_ensure_empty_length(json, FPCONV_G_FMT_BUFSIZE); + len = fpconv_g_fmt(strbuf_empty_ptr(json), num, cfg->encode_number_precision); + strbuf_extend_length(json, len); } } @@ -963,7 +965,7 @@ static void json_next_number_token(json_parse_t *json, json_token_t *token) token->type = T_NUMBER; startptr = &json->data[json->index]; - token->value.number = strtod(&json->data[json->index], &endptr); + token->value.number = fpconv_strtod(&json->data[json->index], &endptr); if (startptr == endptr) json_set_token_error(token, json, "invalid number"); else @@ -1254,9 +1256,13 @@ int luaopen_cjson(lua_State *l) { "encode_number_precision", json_cfg_encode_number_precision }, { "encode_keep_buffer", json_cfg_encode_keep_buffer }, { "refuse_invalid_numbers", json_cfg_refuse_invalid_numbers }, + { "update_locale", json_update_locale }, { NULL, NULL } }; + /* Update the current locale for g_fmt/strtod */ + fpconv_update_locale(); + /* Use json_config_key as a pointer. * It's faster than using a config string, and more unique */ lua_pushlightuserdata(l, &json_config_key); diff --git a/strbuf.h b/strbuf.h index f856543..fbc8651 100644 --- a/strbuf.h +++ b/strbuf.h @@ -62,7 +62,9 @@ extern void strbuf_resize(strbuf_t *s, int len); static int strbuf_empty_length(strbuf_t *s); static int strbuf_length(strbuf_t *s); static char *strbuf_string(strbuf_t *s, int *len); -static void strbuf_ensure_empty_length(strbuf_t *s, int len); +static void strbuf_ensure_empty_length(strbuf_t *s, int len); +static char *strbuf_empty_ptr(strbuf_t *s); +static void strbuf_extend_length(strbuf_t *s, int len); /* Update */ extern void strbuf_append_fmt(strbuf_t *s, int len, const char *fmt, ...); @@ -96,6 +98,16 @@ static inline void strbuf_ensure_empty_length(strbuf_t *s, int len) strbuf_resize(s, s->length + len); } +static inline char *strbuf_empty_ptr(strbuf_t *s) +{ + return s->buf + s->length; +} + +static inline void strbuf_extend_length(strbuf_t *s, int len) +{ + s->length += len; +} + static inline int strbuf_length(strbuf_t *s) { return s->length; diff --git a/tests/test.lua b/tests/test.lua index bb696a2..bdae6ea 100755 --- a/tests/test.lua +++ b/tests/test.lua @@ -211,12 +211,14 @@ local escape_tests = { local locale_tests = { function () os.setlocale("cs_CZ") + cjson.update_locale() return "Setting locale to cs_CZ (comma separator)" end, { json.encode, { 1.5 }, true, { '1.5' } }, { json.decode, { "[ 10, \"test\" ]" }, true, { { 10, "test" } } }, function () os.setlocale("C") + cjson.update_locale() return "Reverting locale to POSIX" end } -- cgit v1.2.3-55-g6feb