aboutsummaryrefslogtreecommitdiff
path: root/src/lib_string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib_string.c')
-rw-r--r--src/lib_string.c234
1 files changed, 121 insertions, 113 deletions
diff --git a/src/lib_string.c b/src/lib_string.c
index 9e8ab900..09010b15 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -18,6 +18,7 @@
18#include "lj_obj.h" 18#include "lj_obj.h"
19#include "lj_gc.h" 19#include "lj_gc.h"
20#include "lj_err.h" 20#include "lj_err.h"
21#include "lj_buf.h"
21#include "lj_str.h" 22#include "lj_str.h"
22#include "lj_tab.h" 23#include "lj_tab.h"
23#include "lj_meta.h" 24#include "lj_meta.h"
@@ -64,7 +65,7 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs); 68 char *buf = lj_buf_tmp(L, (size_t)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
@@ -91,8 +92,6 @@ LJLIB_ASM(string_rep)
91 int32_t len = (int32_t)s->len; 92 int32_t len = (int32_t)s->len;
92 global_State *g = G(L); 93 global_State *g = G(L);
93 int64_t tlen; 94 int64_t tlen;
94 const char *src;
95 char *buf;
96 if (k <= 0) { 95 if (k <= 0) {
97 empty: 96 empty:
98 setstrV(L, L->base-1, &g->strempty); 97 setstrV(L, L->base-1, &g->strempty);
@@ -110,31 +109,34 @@ LJLIB_ASM(string_rep)
110 if (tlen > LJ_MAX_STR) 109 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV); 110 lj_err_caller(L, LJ_ERR_STROV);
112 } 111 }
113 if (tlen == 0) goto empty; 112 if (tlen == 0) {
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen); 113 goto empty;
115 src = strdata(s); 114 } else {
116 if (sep) { 115 char *buf = lj_buf_tmp(L, (MSize)tlen), *p = buf;
117 tlen -= sep->len; /* Ignore trailing separator. */ 116 const char *src = strdata(s);
118 if (k > 1) { /* Paste one string and one separator. */ 117 if (sep) {
119 int32_t i; 118 tlen -= sep->len; /* Ignore trailing separator. */
120 i = 0; while (i < len) *buf++ = src[i++]; 119 if (k > 1) { /* Paste one string and one separator. */
121 src = strdata(sep); len = sep->len; 120 int32_t i;
122 i = 0; while (i < len) *buf++ = src[i++]; 121 i = 0; while (i < len) *p++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */ 122 src = strdata(sep); len = sep->len;
123 i = 0; while (i < len) *p++ = src[i++];
124 src = buf; len += s->len; k--; /* Now copy that k-1 times. */
125 }
124 } 126 }
127 do {
128 int32_t i = 0;
129 do { *p++ = src[i++]; } while (i < len);
130 } while (--k > 0);
131 setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)tlen));
125 } 132 }
126 do {
127 int32_t i = 0;
128 do { *buf++ = src[i++]; } while (i < len);
129 } while (--k > 0);
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1); 133 return FFH_RES(1);
132} 134}
133 135
134LJLIB_ASM(string_reverse) 136LJLIB_ASM(string_reverse)
135{ 137{
136 GCstr *s = lj_lib_checkstr(L, 1); 138 GCstr *s = lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len); 139 lj_buf_tmp(L, s->len);
138 return FFH_RETRY; 140 return FFH_RETRY;
139} 141}
140LJLIB_ASM_(string_lower) 142LJLIB_ASM_(string_lower)
@@ -142,9 +144,9 @@ LJLIB_ASM_(string_upper)
142 144
143/* ------------------------------------------------------------------------ */ 145/* ------------------------------------------------------------------------ */
144 146
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 147static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 148{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 149 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 150 UNUSED(L);
149 return 0; 151 return 0;
150} 152}
@@ -153,12 +155,14 @@ LJLIB_CF(string_dump)
153{ 155{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 156 GCfunc *fn = lj_lib_checkfunc(L, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 157 int strip = L->base+1 < L->top && tvistruecond(L->base+1);
156 luaL_Buffer b; 158 SBuf *sb = &G(L)->tmpbuf; /* Assumes lj_bcwrite() doesn't use tmpbuf. */
159 setmref(sb->L, L);
160 lj_buf_reset(sb);
157 L->top = L->base+1; 161 L->top = L->base+1;
158 luaL_buffinit(L, &b); 162 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 163 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 164 setstrV(L, L->top-1, lj_buf_str(L, sb));
165 lj_gc_check(L);
162 return 1; 166 return 1;
163} 167}
164 168
@@ -698,76 +702,81 @@ LJLIB_CF(string_gsub)
698 702
699/* ------------------------------------------------------------------------ */ 703/* ------------------------------------------------------------------------ */
700 704
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 705/* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */
702#define MAX_FMTITEM 512 706#define STRING_FMT_MAXBUF 512
703/* valid flags in a format specification */ 707/* Valid format specifier flags. */
704#define FMT_FLAGS "-+ #0" 708#define STRING_FMT_FLAGS "-+ #0"
705/* 709/* Max. format specifier size. */
706** maximum size of each format specification (such as '%-099.99d') 710#define STRING_FMT_MAXSPEC \
707** (+10 accounts for %99.99x plus margin of error) 711 (sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710 712
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg) 713/* Add quoted string to buffer. */
714static void string_fmt_quoted(SBuf *sb, GCstr *str)
712{ 715{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str); 716 const char *s = strdata(str);
716 luaL_addchar(b, '"'); 717 MSize len = str->len;
718 lj_buf_putb(sb, '"');
717 while (len--) { 719 while (len--) {
718 uint32_t c = uchar(*s); 720 uint32_t c = (uint32_t)(uint8_t)*s++;
721 char *p = lj_buf_more(sb, 4);
719 if (c == '"' || c == '\\' || c == '\n') { 722 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\'); 723 *p++ = '\\';
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ 724 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d; 725 uint32_t d;
723 luaL_addchar(b, '\\'); 726 *p++ = '\\';
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) { 727 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100; 728 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens; 729 goto tens;
727 } else if (c >= 10) { 730 } else if (c >= 10) {
728 tens: 731 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d); 732 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
730 } 733 }
731 c += '0'; 734 c += '0';
732 } 735 }
733 luaL_addchar(b, c); 736 *p++ = (char)c;
734 s++; 737 setsbufP(sb, p);
735 } 738 }
736 luaL_addchar(b, '"'); 739 lj_buf_putb(sb, '"');
737} 740}
738 741
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form) 742/* Scan format and generate format specifier. */
743static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt)
740{ 744{
741 const char *p = strfrmt; 745 const char *p = fmt;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ 746 while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) 747 if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS))
744 lj_err_caller(L, LJ_ERR_STRFMTR); 748 lj_err_caller(L, LJ_ERR_STRFMTR);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ 749 if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ 750 if (lj_char_isdigit((uint8_t)*p)) p++;
747 if (*p == '.') { 751 if (*p == '.') {
748 p++; 752 p++;
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */ 753 if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ 754 if (lj_char_isdigit((uint8_t)*p)) p++;
751 } 755 }
752 if (lj_char_isdigit(uchar(*p))) 756 if (lj_char_isdigit((uint8_t)*p))
753 lj_err_caller(L, LJ_ERR_STRFMTW); 757 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%'; 758 *spec++ = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); 759 strncpy(spec, fmt, (size_t)(p - fmt + 1));
756 form += p - strfrmt + 1; 760 spec += p - fmt + 1;
757 *form = '\0'; 761 *spec = '\0';
758 return p; 762 return p;
759} 763}
760 764
761static void addintlen(char *form) 765/* Patch LUA_INTRFRMLEN into integer format specifier. */
766static void string_fmt_intfmt(char *spec)
762{ 767{
763 size_t l = strlen(form); 768 char c;
764 char spec = form[l - 1]; 769 do {
765 strcpy(form + l - 1, LUA_INTFRMLEN); 770 c = *spec++;
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; 771 } while (*spec);
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; 772 *--spec = (LUA_INTFRMLEN)[0];
773 if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1];
774 *++spec = c;
775 *++spec = '\0';
768} 776}
769 777
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) 778/* Derive sprintf argument for integer format. Ugly. */
779static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg)
771{ 780{
772 if (sizeof(LUA_INTFRM_T) == 4) { 781 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); 782 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
@@ -782,7 +791,8 @@ static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
782 } 791 }
783} 792}
784 793
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) 794/* Derive sprintf argument for unsigned integer format. Ugly. */
795static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg)
786{ 796{
787 if (sizeof(LUA_INTFRM_T) == 4) { 797 if (sizeof(LUA_INTFRM_T) == 4) {
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); 798 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
@@ -799,7 +809,8 @@ static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
799 } 809 }
800} 810}
801 811
802static GCstr *meta_tostring(lua_State *L, int arg) 812/* Emulate tostring() inline. */
813static GCstr *string_fmt_tostring(lua_State *L, int arg)
803{ 814{
804 TValue *o = L->base+arg-1; 815 TValue *o = L->base+arg-1;
805 cTValue *mo; 816 cTValue *mo;
@@ -837,33 +848,33 @@ static GCstr *meta_tostring(lua_State *L, int arg)
837LJLIB_CF(string_format) 848LJLIB_CF(string_format)
838{ 849{
839 int arg = 1, top = (int)(L->top - L->base); 850 int arg = 1, top = (int)(L->top - L->base);
840 GCstr *fmt = lj_lib_checkstr(L, arg); 851 GCstr *sfmt = lj_lib_checkstr(L, arg);
841 const char *strfrmt = strdata(fmt); 852 const char *fmt = strdata(sfmt);
842 const char *strfrmt_end = strfrmt + fmt->len; 853 const char *efmt = fmt + sfmt->len;
843 luaL_Buffer b; 854 SBuf *sb = &G(L)->tmpbuf;
844 luaL_buffinit(L, &b); 855 setmref(sb->L, L);
845 while (strfrmt < strfrmt_end) { 856 lj_buf_reset(sb);
846 if (*strfrmt != L_ESC) { 857 while (fmt < efmt) {
847 luaL_addchar(&b, *strfrmt++); 858 if (*fmt != L_ESC || *++fmt == L_ESC) {
848 } else if (*++strfrmt == L_ESC) { 859 lj_buf_putb(sb, *fmt++);
849 luaL_addchar(&b, *strfrmt++); /* %% */ 860 } else {
850 } else { /* format item */ 861 char buf[STRING_FMT_MAXBUF];
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */ 862 char spec[STRING_FMT_MAXSPEC];
852 char buff[MAX_FMTITEM]; /* to store the formatted item */ 863 MSize len = 0;
853 if (++arg > top) 864 if (++arg > top)
854 luaL_argerror(L, arg, lj_obj_typename[0]); 865 luaL_argerror(L, arg, lj_obj_typename[0]);
855 strfrmt = scanformat(L, strfrmt, form); 866 fmt = string_fmt_scan(L, spec, fmt);
856 switch (*strfrmt++) { 867 switch (*fmt++) {
857 case 'c': 868 case 'c':
858 sprintf(buff, form, lj_lib_checkint(L, arg)); 869 len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg));
859 break; 870 break;
860 case 'd': case 'i': 871 case 'd': case 'i':
861 addintlen(form); 872 string_fmt_intfmt(spec);
862 sprintf(buff, form, num2intfrm(L, arg)); 873 len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg));
863 break; 874 break;
864 case 'o': case 'u': case 'x': case 'X': 875 case 'o': case 'u': case 'x': case 'X':
865 addintlen(form); 876 string_fmt_intfmt(spec);
866 sprintf(buff, form, num2uintfrm(L, arg)); 877 len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg));
867 break; 878 break;
868 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { 879 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
869 TValue tv; 880 TValue tv;
@@ -871,48 +882,45 @@ LJLIB_CF(string_format)
871 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { 882 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
872 /* Canonicalize output of non-finite values. */ 883 /* Canonicalize output of non-finite values. */
873 char *p, nbuf[LJ_STR_NUMBUF]; 884 char *p, nbuf[LJ_STR_NUMBUF];
874 size_t len = lj_str_bufnum(nbuf, &tv); 885 MSize n = lj_str_bufnum(nbuf, &tv);
875 if (strfrmt[-1] < 'a') { 886 if (fmt[-1] < 'a') {
876 nbuf[len-3] = nbuf[len-3] - 0x20; 887 nbuf[n-3] = nbuf[n-3] - 0x20;
877 nbuf[len-2] = nbuf[len-2] - 0x20; 888 nbuf[n-2] = nbuf[n-2] - 0x20;
878 nbuf[len-1] = nbuf[len-1] - 0x20; 889 nbuf[n-1] = nbuf[n-1] - 0x20;
879 } 890 }
880 nbuf[len] = '\0'; 891 nbuf[n] = '\0';
881 for (p = form; *p < 'A' && *p != '.'; p++) ; 892 for (p = spec; *p < 'A' && *p != '.'; p++) ;
882 *p++ = 's'; *p = '\0'; 893 *p++ = 's'; *p = '\0';
883 sprintf(buff, form, nbuf); 894 len = (MSize)sprintf(buf, spec, nbuf);
884 break; 895 break;
885 } 896 }
886 sprintf(buff, form, (double)tv.n); 897 len = (MSize)sprintf(buf, spec, (double)tv.n);
887 break; 898 break;
888 } 899 }
889 case 'q': 900 case 'q':
890 addquoted(L, &b, arg); 901 string_fmt_quoted(sb, lj_lib_checkstr(L, arg));
891 continue; 902 continue;
892 case 'p': 903 case 'p':
893 lj_str_pushf(L, "%p", lua_topointer(L, arg)); 904 len = lj_str_bufptr(buf, lua_topointer(L, arg));
894 luaL_addvalue(&b); 905 break;
895 continue;
896 case 's': { 906 case 's': {
897 GCstr *str = meta_tostring(L, arg); 907 GCstr *str = string_fmt_tostring(L, arg);
898 if (!strchr(form, '.') && str->len >= 100) { 908 if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */
899 /* no precision and string is too long to be formatted; 909 lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */
900 keep original string */
901 setstrV(L, L->top++, str);
902 luaL_addvalue(&b);
903 continue; 910 continue;
904 } 911 }
905 sprintf(buff, form, strdata(str)); 912 len = (MSize)sprintf(buf, spec, strdata(str));
906 break; 913 break;
907 } 914 }
908 default: 915 default:
909 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); 916 lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' ');
910 break; 917 break;
911 } 918 }
912 luaL_addlstring(&b, buff, strlen(buff)); 919 lj_buf_putmem(sb, buf, len);
913 } 920 }
914 } 921 }
915 luaL_pushresult(&b); 922 setstrV(L, L->top-1, lj_buf_str(L, sb));
923 lj_gc_check(L);
916 return 1; 924 return 1;
917} 925}
918 926