diff options
Diffstat (limited to 'src/lib_string.c')
-rw-r--r-- | src/lib_string.c | 234 |
1 files changed, 121 insertions, 113 deletions
diff --git a/src/lib_string.c b/src/lib_string.c index 9e8ab900..09010b15 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "lj_obj.h" | 18 | #include "lj_obj.h" |
19 | #include "lj_gc.h" | 19 | #include "lj_gc.h" |
20 | #include "lj_err.h" | 20 | #include "lj_err.h" |
21 | #include "lj_buf.h" | ||
21 | #include "lj_str.h" | 22 | #include "lj_str.h" |
22 | #include "lj_tab.h" | 23 | #include "lj_tab.h" |
23 | #include "lj_meta.h" | 24 | #include "lj_meta.h" |
@@ -64,7 +65,7 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) | |||
64 | LJLIB_ASM(string_char) | 65 | LJLIB_ASM(string_char) |
65 | { | 66 | { |
66 | int i, nargs = (int)(L->top - L->base); | 67 | int i, nargs = (int)(L->top - L->base); |
67 | char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs); | 68 | char *buf = lj_buf_tmp(L, (size_t)nargs); |
68 | for (i = 1; i <= nargs; i++) { | 69 | for (i = 1; i <= nargs; i++) { |
69 | int32_t k = lj_lib_checkint(L, i); | 70 | int32_t k = lj_lib_checkint(L, i); |
70 | if (!checku8(k)) | 71 | if (!checku8(k)) |
@@ -91,8 +92,6 @@ LJLIB_ASM(string_rep) | |||
91 | int32_t len = (int32_t)s->len; | 92 | int32_t len = (int32_t)s->len; |
92 | global_State *g = G(L); | 93 | global_State *g = G(L); |
93 | int64_t tlen; | 94 | int64_t tlen; |
94 | const char *src; | ||
95 | char *buf; | ||
96 | if (k <= 0) { | 95 | if (k <= 0) { |
97 | empty: | 96 | empty: |
98 | setstrV(L, L->base-1, &g->strempty); | 97 | setstrV(L, L->base-1, &g->strempty); |
@@ -110,31 +109,34 @@ LJLIB_ASM(string_rep) | |||
110 | if (tlen > LJ_MAX_STR) | 109 | if (tlen > LJ_MAX_STR) |
111 | lj_err_caller(L, LJ_ERR_STROV); | 110 | lj_err_caller(L, LJ_ERR_STROV); |
112 | } | 111 | } |
113 | if (tlen == 0) goto empty; | 112 | if (tlen == 0) { |
114 | buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen); | 113 | goto empty; |
115 | src = strdata(s); | 114 | } else { |
116 | if (sep) { | 115 | char *buf = lj_buf_tmp(L, (MSize)tlen), *p = buf; |
117 | tlen -= sep->len; /* Ignore trailing separator. */ | 116 | const char *src = strdata(s); |
118 | if (k > 1) { /* Paste one string and one separator. */ | 117 | if (sep) { |
119 | int32_t i; | 118 | tlen -= sep->len; /* Ignore trailing separator. */ |
120 | i = 0; while (i < len) *buf++ = src[i++]; | 119 | if (k > 1) { /* Paste one string and one separator. */ |
121 | src = strdata(sep); len = sep->len; | 120 | int32_t i; |
122 | i = 0; while (i < len) *buf++ = src[i++]; | 121 | i = 0; while (i < len) *p++ = src[i++]; |
123 | src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */ | 122 | src = strdata(sep); len = sep->len; |
123 | i = 0; while (i < len) *p++ = src[i++]; | ||
124 | src = buf; len += s->len; k--; /* Now copy that k-1 times. */ | ||
125 | } | ||
124 | } | 126 | } |
127 | do { | ||
128 | int32_t i = 0; | ||
129 | do { *p++ = src[i++]; } while (i < len); | ||
130 | } while (--k > 0); | ||
131 | setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)tlen)); | ||
125 | } | 132 | } |
126 | do { | ||
127 | int32_t i = 0; | ||
128 | do { *buf++ = src[i++]; } while (i < len); | ||
129 | } while (--k > 0); | ||
130 | setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen)); | ||
131 | return FFH_RES(1); | 133 | return FFH_RES(1); |
132 | } | 134 | } |
133 | 135 | ||
134 | LJLIB_ASM(string_reverse) | 136 | LJLIB_ASM(string_reverse) |
135 | { | 137 | { |
136 | GCstr *s = lj_lib_checkstr(L, 1); | 138 | GCstr *s = lj_lib_checkstr(L, 1); |
137 | lj_str_needbuf(L, &G(L)->tmpbuf, s->len); | 139 | lj_buf_tmp(L, s->len); |
138 | return FFH_RETRY; | 140 | return FFH_RETRY; |
139 | } | 141 | } |
140 | LJLIB_ASM_(string_lower) | 142 | LJLIB_ASM_(string_lower) |
@@ -142,9 +144,9 @@ LJLIB_ASM_(string_upper) | |||
142 | 144 | ||
143 | /* ------------------------------------------------------------------------ */ | 145 | /* ------------------------------------------------------------------------ */ |
144 | 146 | ||
145 | static int writer_buf(lua_State *L, const void *p, size_t size, void *b) | 147 | static int writer_buf(lua_State *L, const void *p, size_t size, void *sb) |
146 | { | 148 | { |
147 | luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); | 149 | lj_buf_putmem((SBuf *)sb, p, (MSize)size); |
148 | UNUSED(L); | 150 | UNUSED(L); |
149 | return 0; | 151 | return 0; |
150 | } | 152 | } |
@@ -153,12 +155,14 @@ LJLIB_CF(string_dump) | |||
153 | { | 155 | { |
154 | GCfunc *fn = lj_lib_checkfunc(L, 1); | 156 | GCfunc *fn = lj_lib_checkfunc(L, 1); |
155 | int strip = L->base+1 < L->top && tvistruecond(L->base+1); | 157 | int strip = L->base+1 < L->top && tvistruecond(L->base+1); |
156 | luaL_Buffer b; | 158 | SBuf *sb = &G(L)->tmpbuf; /* Assumes lj_bcwrite() doesn't use tmpbuf. */ |
159 | setmref(sb->L, L); | ||
160 | lj_buf_reset(sb); | ||
157 | L->top = L->base+1; | 161 | L->top = L->base+1; |
158 | luaL_buffinit(L, &b); | 162 | if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) |
159 | if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip)) | ||
160 | lj_err_caller(L, LJ_ERR_STRDUMP); | 163 | lj_err_caller(L, LJ_ERR_STRDUMP); |
161 | luaL_pushresult(&b); | 164 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
165 | lj_gc_check(L); | ||
162 | return 1; | 166 | return 1; |
163 | } | 167 | } |
164 | 168 | ||
@@ -698,76 +702,81 @@ LJLIB_CF(string_gsub) | |||
698 | 702 | ||
699 | /* ------------------------------------------------------------------------ */ | 703 | /* ------------------------------------------------------------------------ */ |
700 | 704 | ||
701 | /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ | 705 | /* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */ |
702 | #define MAX_FMTITEM 512 | 706 | #define STRING_FMT_MAXBUF 512 |
703 | /* valid flags in a format specification */ | 707 | /* Valid format specifier flags. */ |
704 | #define FMT_FLAGS "-+ #0" | 708 | #define STRING_FMT_FLAGS "-+ #0" |
705 | /* | 709 | /* Max. format specifier size. */ |
706 | ** maximum size of each format specification (such as '%-099.99d') | 710 | #define STRING_FMT_MAXSPEC \ |
707 | ** (+10 accounts for %99.99x plus margin of error) | 711 | (sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) |
708 | */ | ||
709 | #define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) | ||
710 | 712 | ||
711 | static void addquoted(lua_State *L, luaL_Buffer *b, int arg) | 713 | /* Add quoted string to buffer. */ |
714 | static void string_fmt_quoted(SBuf *sb, GCstr *str) | ||
712 | { | 715 | { |
713 | GCstr *str = lj_lib_checkstr(L, arg); | ||
714 | int32_t len = (int32_t)str->len; | ||
715 | const char *s = strdata(str); | 716 | const char *s = strdata(str); |
716 | luaL_addchar(b, '"'); | 717 | MSize len = str->len; |
718 | lj_buf_putb(sb, '"'); | ||
717 | while (len--) { | 719 | while (len--) { |
718 | uint32_t c = uchar(*s); | 720 | uint32_t c = (uint32_t)(uint8_t)*s++; |
721 | char *p = lj_buf_more(sb, 4); | ||
719 | if (c == '"' || c == '\\' || c == '\n') { | 722 | if (c == '"' || c == '\\' || c == '\n') { |
720 | luaL_addchar(b, '\\'); | 723 | *p++ = '\\'; |
721 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ | 724 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ |
722 | uint32_t d; | 725 | uint32_t d; |
723 | luaL_addchar(b, '\\'); | 726 | *p++ = '\\'; |
724 | if (c >= 100 || lj_char_isdigit(uchar(s[1]))) { | 727 | if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { |
725 | luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100; | 728 | *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; |
726 | goto tens; | 729 | goto tens; |
727 | } else if (c >= 10) { | 730 | } else if (c >= 10) { |
728 | tens: | 731 | tens: |
729 | d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d); | 732 | d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); |
730 | } | 733 | } |
731 | c += '0'; | 734 | c += '0'; |
732 | } | 735 | } |
733 | luaL_addchar(b, c); | 736 | *p++ = (char)c; |
734 | s++; | 737 | setsbufP(sb, p); |
735 | } | 738 | } |
736 | luaL_addchar(b, '"'); | 739 | lj_buf_putb(sb, '"'); |
737 | } | 740 | } |
738 | 741 | ||
739 | static const char *scanformat(lua_State *L, const char *strfrmt, char *form) | 742 | /* Scan format and generate format specifier. */ |
743 | static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt) | ||
740 | { | 744 | { |
741 | const char *p = strfrmt; | 745 | const char *p = fmt; |
742 | while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ | 746 | while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */ |
743 | if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) | 747 | if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS)) |
744 | lj_err_caller(L, LJ_ERR_STRFMTR); | 748 | lj_err_caller(L, LJ_ERR_STRFMTR); |
745 | if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ | 749 | if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */ |
746 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | 750 | if (lj_char_isdigit((uint8_t)*p)) p++; |
747 | if (*p == '.') { | 751 | if (*p == '.') { |
748 | p++; | 752 | p++; |
749 | if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */ | 753 | if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */ |
750 | if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ | 754 | if (lj_char_isdigit((uint8_t)*p)) p++; |
751 | } | 755 | } |
752 | if (lj_char_isdigit(uchar(*p))) | 756 | if (lj_char_isdigit((uint8_t)*p)) |
753 | lj_err_caller(L, LJ_ERR_STRFMTW); | 757 | lj_err_caller(L, LJ_ERR_STRFMTW); |
754 | *(form++) = '%'; | 758 | *spec++ = '%'; |
755 | strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); | 759 | strncpy(spec, fmt, (size_t)(p - fmt + 1)); |
756 | form += p - strfrmt + 1; | 760 | spec += p - fmt + 1; |
757 | *form = '\0'; | 761 | *spec = '\0'; |
758 | return p; | 762 | return p; |
759 | } | 763 | } |
760 | 764 | ||
761 | static void addintlen(char *form) | 765 | /* Patch LUA_INTRFRMLEN into integer format specifier. */ |
766 | static void string_fmt_intfmt(char *spec) | ||
762 | { | 767 | { |
763 | size_t l = strlen(form); | 768 | char c; |
764 | char spec = form[l - 1]; | 769 | do { |
765 | strcpy(form + l - 1, LUA_INTFRMLEN); | 770 | c = *spec++; |
766 | form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; | 771 | } while (*spec); |
767 | form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; | 772 | *--spec = (LUA_INTFRMLEN)[0]; |
773 | if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1]; | ||
774 | *++spec = c; | ||
775 | *++spec = '\0'; | ||
768 | } | 776 | } |
769 | 777 | ||
770 | static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) | 778 | /* Derive sprintf argument for integer format. Ugly. */ |
779 | static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg) | ||
771 | { | 780 | { |
772 | if (sizeof(LUA_INTFRM_T) == 4) { | 781 | if (sizeof(LUA_INTFRM_T) == 4) { |
773 | return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); | 782 | return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); |
@@ -782,7 +791,8 @@ static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) | |||
782 | } | 791 | } |
783 | } | 792 | } |
784 | 793 | ||
785 | static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) | 794 | /* Derive sprintf argument for unsigned integer format. Ugly. */ |
795 | static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg) | ||
786 | { | 796 | { |
787 | if (sizeof(LUA_INTFRM_T) == 4) { | 797 | if (sizeof(LUA_INTFRM_T) == 4) { |
788 | return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); | 798 | return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); |
@@ -799,7 +809,8 @@ static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) | |||
799 | } | 809 | } |
800 | } | 810 | } |
801 | 811 | ||
802 | static GCstr *meta_tostring(lua_State *L, int arg) | 812 | /* Emulate tostring() inline. */ |
813 | static GCstr *string_fmt_tostring(lua_State *L, int arg) | ||
803 | { | 814 | { |
804 | TValue *o = L->base+arg-1; | 815 | TValue *o = L->base+arg-1; |
805 | cTValue *mo; | 816 | cTValue *mo; |
@@ -837,33 +848,33 @@ static GCstr *meta_tostring(lua_State *L, int arg) | |||
837 | LJLIB_CF(string_format) | 848 | LJLIB_CF(string_format) |
838 | { | 849 | { |
839 | int arg = 1, top = (int)(L->top - L->base); | 850 | int arg = 1, top = (int)(L->top - L->base); |
840 | GCstr *fmt = lj_lib_checkstr(L, arg); | 851 | GCstr *sfmt = lj_lib_checkstr(L, arg); |
841 | const char *strfrmt = strdata(fmt); | 852 | const char *fmt = strdata(sfmt); |
842 | const char *strfrmt_end = strfrmt + fmt->len; | 853 | const char *efmt = fmt + sfmt->len; |
843 | luaL_Buffer b; | 854 | SBuf *sb = &G(L)->tmpbuf; |
844 | luaL_buffinit(L, &b); | 855 | setmref(sb->L, L); |
845 | while (strfrmt < strfrmt_end) { | 856 | lj_buf_reset(sb); |
846 | if (*strfrmt != L_ESC) { | 857 | while (fmt < efmt) { |
847 | luaL_addchar(&b, *strfrmt++); | 858 | if (*fmt != L_ESC || *++fmt == L_ESC) { |
848 | } else if (*++strfrmt == L_ESC) { | 859 | lj_buf_putb(sb, *fmt++); |
849 | luaL_addchar(&b, *strfrmt++); /* %% */ | 860 | } else { |
850 | } else { /* format item */ | 861 | char buf[STRING_FMT_MAXBUF]; |
851 | char form[MAX_FMTSPEC]; /* to store the format (`%...') */ | 862 | char spec[STRING_FMT_MAXSPEC]; |
852 | char buff[MAX_FMTITEM]; /* to store the formatted item */ | 863 | MSize len = 0; |
853 | if (++arg > top) | 864 | if (++arg > top) |
854 | luaL_argerror(L, arg, lj_obj_typename[0]); | 865 | luaL_argerror(L, arg, lj_obj_typename[0]); |
855 | strfrmt = scanformat(L, strfrmt, form); | 866 | fmt = string_fmt_scan(L, spec, fmt); |
856 | switch (*strfrmt++) { | 867 | switch (*fmt++) { |
857 | case 'c': | 868 | case 'c': |
858 | sprintf(buff, form, lj_lib_checkint(L, arg)); | 869 | len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg)); |
859 | break; | 870 | break; |
860 | case 'd': case 'i': | 871 | case 'd': case 'i': |
861 | addintlen(form); | 872 | string_fmt_intfmt(spec); |
862 | sprintf(buff, form, num2intfrm(L, arg)); | 873 | len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg)); |
863 | break; | 874 | break; |
864 | case 'o': case 'u': case 'x': case 'X': | 875 | case 'o': case 'u': case 'x': case 'X': |
865 | addintlen(form); | 876 | string_fmt_intfmt(spec); |
866 | sprintf(buff, form, num2uintfrm(L, arg)); | 877 | len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg)); |
867 | break; | 878 | break; |
868 | case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { | 879 | case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { |
869 | TValue tv; | 880 | TValue tv; |
@@ -871,48 +882,45 @@ LJLIB_CF(string_format) | |||
871 | if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { | 882 | if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { |
872 | /* Canonicalize output of non-finite values. */ | 883 | /* Canonicalize output of non-finite values. */ |
873 | char *p, nbuf[LJ_STR_NUMBUF]; | 884 | char *p, nbuf[LJ_STR_NUMBUF]; |
874 | size_t len = lj_str_bufnum(nbuf, &tv); | 885 | MSize n = lj_str_bufnum(nbuf, &tv); |
875 | if (strfrmt[-1] < 'a') { | 886 | if (fmt[-1] < 'a') { |
876 | nbuf[len-3] = nbuf[len-3] - 0x20; | 887 | nbuf[n-3] = nbuf[n-3] - 0x20; |
877 | nbuf[len-2] = nbuf[len-2] - 0x20; | 888 | nbuf[n-2] = nbuf[n-2] - 0x20; |
878 | nbuf[len-1] = nbuf[len-1] - 0x20; | 889 | nbuf[n-1] = nbuf[n-1] - 0x20; |
879 | } | 890 | } |
880 | nbuf[len] = '\0'; | 891 | nbuf[n] = '\0'; |
881 | for (p = form; *p < 'A' && *p != '.'; p++) ; | 892 | for (p = spec; *p < 'A' && *p != '.'; p++) ; |
882 | *p++ = 's'; *p = '\0'; | 893 | *p++ = 's'; *p = '\0'; |
883 | sprintf(buff, form, nbuf); | 894 | len = (MSize)sprintf(buf, spec, nbuf); |
884 | break; | 895 | break; |
885 | } | 896 | } |
886 | sprintf(buff, form, (double)tv.n); | 897 | len = (MSize)sprintf(buf, spec, (double)tv.n); |
887 | break; | 898 | break; |
888 | } | 899 | } |
889 | case 'q': | 900 | case 'q': |
890 | addquoted(L, &b, arg); | 901 | string_fmt_quoted(sb, lj_lib_checkstr(L, arg)); |
891 | continue; | 902 | continue; |
892 | case 'p': | 903 | case 'p': |
893 | lj_str_pushf(L, "%p", lua_topointer(L, arg)); | 904 | len = lj_str_bufptr(buf, lua_topointer(L, arg)); |
894 | luaL_addvalue(&b); | 905 | break; |
895 | continue; | ||
896 | case 's': { | 906 | case 's': { |
897 | GCstr *str = meta_tostring(L, arg); | 907 | GCstr *str = string_fmt_tostring(L, arg); |
898 | if (!strchr(form, '.') && str->len >= 100) { | 908 | if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */ |
899 | /* no precision and string is too long to be formatted; | 909 | lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */ |
900 | keep original string */ | ||
901 | setstrV(L, L->top++, str); | ||
902 | luaL_addvalue(&b); | ||
903 | continue; | 910 | continue; |
904 | } | 911 | } |
905 | sprintf(buff, form, strdata(str)); | 912 | len = (MSize)sprintf(buf, spec, strdata(str)); |
906 | break; | 913 | break; |
907 | } | 914 | } |
908 | default: | 915 | default: |
909 | lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); | 916 | lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' '); |
910 | break; | 917 | break; |
911 | } | 918 | } |
912 | luaL_addlstring(&b, buff, strlen(buff)); | 919 | lj_buf_putmem(sb, buf, len); |
913 | } | 920 | } |
914 | } | 921 | } |
915 | luaL_pushresult(&b); | 922 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
923 | lj_gc_check(L); | ||
916 | return 1; | 924 | return 1; |
917 | } | 925 | } |
918 | 926 | ||