diff options
Diffstat (limited to 'src/lib_string.c')
-rw-r--r-- | src/lib_string.c | 178 |
1 files changed, 91 insertions, 87 deletions
diff --git a/src/lib_string.c b/src/lib_string.c index ada0bf6d..09010b15 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
@@ -702,76 +702,81 @@ LJLIB_CF(string_gsub) | |||
702 | 702 | ||
703 | /* ------------------------------------------------------------------------ */ | 703 | /* ------------------------------------------------------------------------ */ |
704 | 704 | ||
705 | /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ | 705 | /* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */ |
706 | #define MAX_FMTITEM 512 | 706 | #define STRING_FMT_MAXBUF 512 |
707 | /* valid flags in a format specification */ | 707 | /* Valid format specifier flags. */ |
708 | #define FMT_FLAGS "-+ #0" | 708 | #define STRING_FMT_FLAGS "-+ #0" |
709 | /* | 709 | /* Max. format specifier size. */ |
710 | ** maximum size of each format specification (such as '%-099.99d') | 710 | #define STRING_FMT_MAXSPEC \ |
711 | ** (+10 accounts for %99.99x plus margin of error) | 711 | (sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) |
712 | */ | ||
713 | #define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) | ||
714 | 712 | ||
715 | static void addquoted(lua_State *L, luaL_Buffer *b, int arg) | 713 | /* Add quoted string to buffer. */ |
714 | static void string_fmt_quoted(SBuf *sb, GCstr *str) | ||
716 | { | 715 | { |
717 | GCstr *str = lj_lib_checkstr(L, arg); | ||
718 | int32_t len = (int32_t)str->len; | ||
719 | const char *s = strdata(str); | 716 | const char *s = strdata(str); |
720 | luaL_addchar(b, '"'); | 717 | MSize len = str->len; |
718 | lj_buf_putb(sb, '"'); | ||
721 | while (len--) { | 719 | while (len--) { |
722 | uint32_t c = uchar(*s); | 720 | uint32_t c = (uint32_t)(uint8_t)*s++; |
721 | char *p = lj_buf_more(sb, 4); | ||
723 | if (c == '"' || c == '\\' || c == '\n') { | 722 | if (c == '"' || c == '\\' || c == '\n') { |
724 | luaL_addchar(b, '\\'); | 723 | *p++ = '\\'; |
725 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ | 724 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ |
726 | uint32_t d; | 725 | uint32_t d; |
727 | luaL_addchar(b, '\\'); | 726 | *p++ = '\\'; |
728 | if (c >= 100 || lj_char_isdigit(uchar(s[1]))) { | 727 | if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { |
729 | luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100; | 728 | *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; |
730 | goto tens; | 729 | goto tens; |
731 | } else if (c >= 10) { | 730 | } else if (c >= 10) { |
732 | tens: | 731 | tens: |
733 | d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d); | 732 | d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); |
734 | } | 733 | } |
735 | c += '0'; | 734 | c += '0'; |
736 | } | 735 | } |
737 | luaL_addchar(b, c); | 736 | *p++ = (char)c; |
738 | s++; | 737 | setsbufP(sb, p); |
739 | } | 738 | } |
740 | luaL_addchar(b, '"'); | 739 | lj_buf_putb(sb, '"'); |
741 | } | 740 | } |
742 | 741 | ||
743 | static const char *scanformat(lua_State *L, const char *strfrmt, char *form) | 742 | /* Scan format and generate format specifier. */ |
743 | static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt) | ||
744 | { | 744 | { |
745 | const char *p = strfrmt; | 745 | const char *p = fmt; |
746 | while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ | 746 | while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */ |
747 | if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) | 747 | if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS)) |
748 | lj_err_caller(L, LJ_ERR_STRFMTR); | 748 | lj_err_caller(L, LJ_ERR_STRFMTR); |
749 | if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ | 749 | if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */ |
750 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | 750 | if (lj_char_isdigit((uint8_t)*p)) p++; |
751 | if (*p == '.') { | 751 | if (*p == '.') { |
752 | p++; | 752 | p++; |
753 | if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */ | 753 | if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */ |
754 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | 754 | if (lj_char_isdigit((uint8_t)*p)) p++; |
755 | } | 755 | } |
756 | if (lj_char_isdigit(uchar(*p))) | 756 | if (lj_char_isdigit((uint8_t)*p)) |
757 | lj_err_caller(L, LJ_ERR_STRFMTW); | 757 | lj_err_caller(L, LJ_ERR_STRFMTW); |
758 | *(form++) = '%'; | 758 | *spec++ = '%'; |
759 | strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); | 759 | strncpy(spec, fmt, (size_t)(p - fmt + 1)); |
760 | form += p - strfrmt + 1; | 760 | spec += p - fmt + 1; |
761 | *form = '\0'; | 761 | *spec = '\0'; |
762 | return p; | 762 | return p; |
763 | } | 763 | } |
764 | 764 | ||
765 | static void addintlen(char *form) | 765 | /* Patch LUA_INTRFRMLEN into integer format specifier. */ |
766 | static void string_fmt_intfmt(char *spec) | ||
766 | { | 767 | { |
767 | size_t l = strlen(form); | 768 | char c; |
768 | char spec = form[l - 1]; | 769 | do { |
769 | strcpy(form + l - 1, LUA_INTFRMLEN); | 770 | c = *spec++; |
770 | form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; | 771 | } while (*spec); |
771 | form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; | 772 | *--spec = (LUA_INTFRMLEN)[0]; |
773 | if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1]; | ||
774 | *++spec = c; | ||
775 | *++spec = '\0'; | ||
772 | } | 776 | } |
773 | 777 | ||
774 | static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) | 778 | /* Derive sprintf argument for integer format. Ugly. */ |
779 | static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg) | ||
775 | { | 780 | { |
776 | if (sizeof(LUA_INTFRM_T) == 4) { | 781 | if (sizeof(LUA_INTFRM_T) == 4) { |
777 | return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); | 782 | return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); |
@@ -786,7 +791,8 @@ static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) | |||
786 | } | 791 | } |
787 | } | 792 | } |
788 | 793 | ||
789 | static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) | 794 | /* Derive sprintf argument for unsigned integer format. Ugly. */ |
795 | static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg) | ||
790 | { | 796 | { |
791 | if (sizeof(LUA_INTFRM_T) == 4) { | 797 | if (sizeof(LUA_INTFRM_T) == 4) { |
792 | return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); | 798 | return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); |
@@ -803,7 +809,8 @@ static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) | |||
803 | } | 809 | } |
804 | } | 810 | } |
805 | 811 | ||
806 | static GCstr *meta_tostring(lua_State *L, int arg) | 812 | /* Emulate tostring() inline. */ |
813 | static GCstr *string_fmt_tostring(lua_State *L, int arg) | ||
807 | { | 814 | { |
808 | TValue *o = L->base+arg-1; | 815 | TValue *o = L->base+arg-1; |
809 | cTValue *mo; | 816 | cTValue *mo; |
@@ -841,33 +848,33 @@ static GCstr *meta_tostring(lua_State *L, int arg) | |||
841 | LJLIB_CF(string_format) | 848 | LJLIB_CF(string_format) |
842 | { | 849 | { |
843 | int arg = 1, top = (int)(L->top - L->base); | 850 | int arg = 1, top = (int)(L->top - L->base); |
844 | GCstr *fmt = lj_lib_checkstr(L, arg); | 851 | GCstr *sfmt = lj_lib_checkstr(L, arg); |
845 | const char *strfrmt = strdata(fmt); | 852 | const char *fmt = strdata(sfmt); |
846 | const char *strfrmt_end = strfrmt + fmt->len; | 853 | const char *efmt = fmt + sfmt->len; |
847 | luaL_Buffer b; | 854 | SBuf *sb = &G(L)->tmpbuf; |
848 | luaL_buffinit(L, &b); | 855 | setmref(sb->L, L); |
849 | while (strfrmt < strfrmt_end) { | 856 | lj_buf_reset(sb); |
850 | if (*strfrmt != L_ESC) { | 857 | while (fmt < efmt) { |
851 | luaL_addchar(&b, *strfrmt++); | 858 | if (*fmt != L_ESC || *++fmt == L_ESC) { |
852 | } else if (*++strfrmt == L_ESC) { | 859 | lj_buf_putb(sb, *fmt++); |
853 | luaL_addchar(&b, *strfrmt++); /* %% */ | 860 | } else { |
854 | } else { /* format item */ | 861 | char buf[STRING_FMT_MAXBUF]; |
855 | char form[MAX_FMTSPEC]; /* to store the format (`%...') */ | 862 | char spec[STRING_FMT_MAXSPEC]; |
856 | char buff[MAX_FMTITEM]; /* to store the formatted item */ | 863 | MSize len = 0; |
857 | if (++arg > top) | 864 | if (++arg > top) |
858 | luaL_argerror(L, arg, lj_obj_typename[0]); | 865 | luaL_argerror(L, arg, lj_obj_typename[0]); |
859 | strfrmt = scanformat(L, strfrmt, form); | 866 | fmt = string_fmt_scan(L, spec, fmt); |
860 | switch (*strfrmt++) { | 867 | switch (*fmt++) { |
861 | case 'c': | 868 | case 'c': |
862 | sprintf(buff, form, lj_lib_checkint(L, arg)); | 869 | len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg)); |
863 | break; | 870 | break; |
864 | case 'd': case 'i': | 871 | case 'd': case 'i': |
865 | addintlen(form); | 872 | string_fmt_intfmt(spec); |
866 | sprintf(buff, form, num2intfrm(L, arg)); | 873 | len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg)); |
867 | break; | 874 | break; |
868 | case 'o': case 'u': case 'x': case 'X': | 875 | case 'o': case 'u': case 'x': case 'X': |
869 | addintlen(form); | 876 | string_fmt_intfmt(spec); |
870 | sprintf(buff, form, num2uintfrm(L, arg)); | 877 | len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg)); |
871 | break; | 878 | break; |
872 | case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { | 879 | case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { |
873 | TValue tv; | 880 | TValue tv; |
@@ -875,48 +882,45 @@ LJLIB_CF(string_format) | |||
875 | if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { | 882 | if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { |
876 | /* Canonicalize output of non-finite values. */ | 883 | /* Canonicalize output of non-finite values. */ |
877 | char *p, nbuf[LJ_STR_NUMBUF]; | 884 | char *p, nbuf[LJ_STR_NUMBUF]; |
878 | MSize len = lj_str_bufnum(nbuf, &tv); | 885 | MSize n = lj_str_bufnum(nbuf, &tv); |
879 | if (strfrmt[-1] < 'a') { | 886 | if (fmt[-1] < 'a') { |
880 | nbuf[len-3] = nbuf[len-3] - 0x20; | 887 | nbuf[n-3] = nbuf[n-3] - 0x20; |
881 | nbuf[len-2] = nbuf[len-2] - 0x20; | 888 | nbuf[n-2] = nbuf[n-2] - 0x20; |
882 | nbuf[len-1] = nbuf[len-1] - 0x20; | 889 | nbuf[n-1] = nbuf[n-1] - 0x20; |
883 | } | 890 | } |
884 | nbuf[len] = '\0'; | 891 | nbuf[n] = '\0'; |
885 | for (p = form; *p < 'A' && *p != '.'; p++) ; | 892 | for (p = spec; *p < 'A' && *p != '.'; p++) ; |
886 | *p++ = 's'; *p = '\0'; | 893 | *p++ = 's'; *p = '\0'; |
887 | sprintf(buff, form, nbuf); | 894 | len = (MSize)sprintf(buf, spec, nbuf); |
888 | break; | 895 | break; |
889 | } | 896 | } |
890 | sprintf(buff, form, (double)tv.n); | 897 | len = (MSize)sprintf(buf, spec, (double)tv.n); |
891 | break; | 898 | break; |
892 | } | 899 | } |
893 | case 'q': | 900 | case 'q': |
894 | addquoted(L, &b, arg); | 901 | string_fmt_quoted(sb, lj_lib_checkstr(L, arg)); |
895 | continue; | 902 | continue; |
896 | case 'p': | 903 | case 'p': |
897 | lj_str_pushf(L, "%p", lua_topointer(L, arg)); | 904 | len = lj_str_bufptr(buf, lua_topointer(L, arg)); |
898 | luaL_addvalue(&b); | 905 | break; |
899 | continue; | ||
900 | case 's': { | 906 | case 's': { |
901 | GCstr *str = meta_tostring(L, arg); | 907 | GCstr *str = string_fmt_tostring(L, arg); |
902 | if (!strchr(form, '.') && str->len >= 100) { | 908 | if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */ |
903 | /* no precision and string is too long to be formatted; | 909 | lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */ |
904 | keep original string */ | ||
905 | setstrV(L, L->top++, str); | ||
906 | luaL_addvalue(&b); | ||
907 | continue; | 910 | continue; |
908 | } | 911 | } |
909 | sprintf(buff, form, strdata(str)); | 912 | len = (MSize)sprintf(buf, spec, strdata(str)); |
910 | break; | 913 | break; |
911 | } | 914 | } |
912 | default: | 915 | default: |
913 | lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); | 916 | lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' '); |
914 | break; | 917 | break; |
915 | } | 918 | } |
916 | luaL_addlstring(&b, buff, strlen(buff)); | 919 | lj_buf_putmem(sb, buf, len); |
917 | } | 920 | } |
918 | } | 921 | } |
919 | luaL_pushresult(&b); | 922 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
923 | lj_gc_check(L); | ||
920 | return 1; | 924 | return 1; |
921 | } | 925 | } |
922 | 926 | ||