diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib_string.c | 236 |
1 files changed, 63 insertions, 173 deletions
diff --git a/src/lib_string.c b/src/lib_string.c index 2c86daa4..b955e933 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
@@ -6,8 +6,6 @@ | |||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | 6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lib_string_c | 9 | #define lib_string_c |
12 | #define LUA_LIB | 10 | #define LUA_LIB |
13 | 11 | ||
@@ -26,6 +24,7 @@ | |||
26 | #include "lj_ff.h" | 24 | #include "lj_ff.h" |
27 | #include "lj_bcdump.h" | 25 | #include "lj_bcdump.h" |
28 | #include "lj_char.h" | 26 | #include "lj_char.h" |
27 | #include "lj_strfmt.h" | ||
29 | #include "lj_lib.h" | 28 | #include "lj_lib.h" |
30 | 29 | ||
31 | /* ------------------------------------------------------------------------ */ | 30 | /* ------------------------------------------------------------------------ */ |
@@ -641,130 +640,20 @@ LJLIB_CF(string_gsub) | |||
641 | 640 | ||
642 | /* ------------------------------------------------------------------------ */ | 641 | /* ------------------------------------------------------------------------ */ |
643 | 642 | ||
644 | /* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */ | ||
645 | #define STRING_FMT_MAXBUF 512 | ||
646 | /* Valid format specifier flags. */ | ||
647 | #define STRING_FMT_FLAGS "-+ #0" | ||
648 | /* Max. format specifier size. */ | ||
649 | #define STRING_FMT_MAXSPEC \ | ||
650 | (sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) | ||
651 | |||
652 | /* Add quoted string to buffer. */ | ||
653 | static void string_fmt_quoted(SBuf *sb, GCstr *str) | ||
654 | { | ||
655 | const char *s = strdata(str); | ||
656 | MSize len = str->len; | ||
657 | lj_buf_putb(sb, '"'); | ||
658 | while (len--) { | ||
659 | uint32_t c = (uint32_t)(uint8_t)*s++; | ||
660 | char *p = lj_buf_more(sb, 4); | ||
661 | if (c == '"' || c == '\\' || c == '\n') { | ||
662 | *p++ = '\\'; | ||
663 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ | ||
664 | uint32_t d; | ||
665 | *p++ = '\\'; | ||
666 | if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { | ||
667 | *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; | ||
668 | goto tens; | ||
669 | } else if (c >= 10) { | ||
670 | tens: | ||
671 | d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); | ||
672 | } | ||
673 | c += '0'; | ||
674 | } | ||
675 | *p++ = (char)c; | ||
676 | setsbufP(sb, p); | ||
677 | } | ||
678 | lj_buf_putb(sb, '"'); | ||
679 | } | ||
680 | |||
681 | /* Scan format and generate format specifier. */ | ||
682 | static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt) | ||
683 | { | ||
684 | const char *p = fmt; | ||
685 | while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */ | ||
686 | if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS)) | ||
687 | lj_err_caller(L, LJ_ERR_STRFMTR); | ||
688 | if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */ | ||
689 | if (lj_char_isdigit((uint8_t)*p)) p++; | ||
690 | if (*p == '.') { | ||
691 | p++; | ||
692 | if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */ | ||
693 | if (lj_char_isdigit((uint8_t)*p)) p++; | ||
694 | } | ||
695 | if (lj_char_isdigit((uint8_t)*p)) | ||
696 | lj_err_caller(L, LJ_ERR_STRFMTW); | ||
697 | *spec++ = '%'; | ||
698 | strncpy(spec, fmt, (size_t)(p - fmt + 1)); | ||
699 | spec += p - fmt + 1; | ||
700 | *spec = '\0'; | ||
701 | return p; | ||
702 | } | ||
703 | |||
704 | /* Patch LUA_INTRFRMLEN into integer format specifier. */ | ||
705 | static void string_fmt_intfmt(char *spec) | ||
706 | { | ||
707 | char c; | ||
708 | do { | ||
709 | c = *spec++; | ||
710 | } while (*spec); | ||
711 | *--spec = (LUA_INTFRMLEN)[0]; | ||
712 | if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1]; | ||
713 | *++spec = c; | ||
714 | *++spec = '\0'; | ||
715 | } | ||
716 | |||
717 | /* Derive sprintf argument for integer format. Ugly. */ | ||
718 | static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg) | ||
719 | { | ||
720 | if (sizeof(LUA_INTFRM_T) == 4) { | ||
721 | return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); | ||
722 | } else { | ||
723 | cTValue *o; | ||
724 | lj_lib_checknumber(L, arg); | ||
725 | o = L->base+arg-1; | ||
726 | if (tvisint(o)) | ||
727 | return (LUA_INTFRM_T)intV(o); | ||
728 | else | ||
729 | return (LUA_INTFRM_T)numV(o); | ||
730 | } | ||
731 | } | ||
732 | |||
733 | /* Derive sprintf argument for unsigned integer format. Ugly. */ | ||
734 | static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg) | ||
735 | { | ||
736 | if (sizeof(LUA_INTFRM_T) == 4) { | ||
737 | return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); | ||
738 | } else { | ||
739 | cTValue *o; | ||
740 | lj_lib_checknumber(L, arg); | ||
741 | o = L->base+arg-1; | ||
742 | if (tvisint(o)) | ||
743 | return (unsigned LUA_INTFRM_T)intV(o); | ||
744 | else if ((int32_t)o->u32.hi < 0) | ||
745 | return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o); | ||
746 | else | ||
747 | return (unsigned LUA_INTFRM_T)numV(o); | ||
748 | } | ||
749 | } | ||
750 | |||
751 | /* Emulate tostring() inline. */ | 643 | /* Emulate tostring() inline. */ |
752 | static GCstr *string_fmt_tostring(lua_State *L, int arg) | 644 | static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) |
753 | { | 645 | { |
754 | TValue *o = L->base+arg-1; | 646 | TValue *o = L->base+arg-1; |
755 | cTValue *mo; | 647 | cTValue *mo; |
756 | lua_assert(o < L->top); /* Caller already checks for existence. */ | 648 | lua_assert(o < L->top); /* Caller already checks for existence. */ |
757 | if (LJ_LIKELY(tvisstr(o))) | 649 | if (LJ_LIKELY(tvisstr(o))) |
758 | return strV(o); | 650 | return strV(o); |
759 | if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | 651 | if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { |
760 | copyTV(L, L->top++, mo); | 652 | copyTV(L, L->top++, mo); |
761 | copyTV(L, L->top++, o); | 653 | copyTV(L, L->top++, o); |
762 | lua_call(L, 1, 1); | 654 | lua_call(L, 1, 1); |
763 | L->top--; | 655 | copyTV(L, L->base+arg-1, --L->top); |
764 | if (tvisstr(L->top)) | 656 | return NULL; /* Buffer may be overwritten, retry. */ |
765 | return strV(L->top); | ||
766 | o = L->base+arg-1; | ||
767 | copyTV(L, o, L->top); | ||
768 | } | 657 | } |
769 | if (tvisnumber(o)) { | 658 | if (tvisnumber(o)) { |
770 | return lj_str_fromnumber(L, o); | 659 | return lj_str_fromnumber(L, o); |
@@ -775,84 +664,85 @@ static GCstr *string_fmt_tostring(lua_State *L, int arg) | |||
775 | } else if (tvistrue(o)) { | 664 | } else if (tvistrue(o)) { |
776 | return lj_str_newlit(L, "true"); | 665 | return lj_str_newlit(L, "true"); |
777 | } else { | 666 | } else { |
778 | if (tvisfunc(o) && isffunc(funcV(o))) | 667 | char buf[8+2+2+16], *p = buf; |
779 | lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid); | 668 | if (tvisfunc(o) && isffunc(funcV(o))) { |
780 | else | 669 | p = lj_buf_wmem(p, "function: builtin#", 18); |
781 | lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg)); | 670 | p = lj_str_bufint(p, funcV(o)->c.ffid); |
782 | L->top--; | 671 | } else { |
783 | return strV(L->top); | 672 | p = lj_buf_wmem(p, lj_typename(o), strlen(lj_typename(o))); |
673 | *p++ = ':'; *p++ = ' '; | ||
674 | p = lj_str_bufptr(p, lua_topointer(L, arg)); | ||
675 | } | ||
676 | return lj_str_new(L, buf, (size_t)(p - buf)); | ||
784 | } | 677 | } |
785 | } | 678 | } |
786 | 679 | ||
787 | LJLIB_CF(string_format) | 680 | LJLIB_CF(string_format) |
788 | { | 681 | { |
789 | int arg = 1, top = (int)(L->top - L->base); | 682 | int arg, top = (int)(L->top - L->base); |
790 | GCstr *sfmt = lj_lib_checkstr(L, arg); | 683 | GCstr *sfmt; |
791 | const char *fmt = strdata(sfmt); | 684 | SBuf *sb; |
792 | const char *efmt = fmt + sfmt->len; | 685 | FormatState fs; |
793 | SBuf *sb = lj_buf_tmp_(L); | 686 | SFormat sf; |
794 | while (fmt < efmt) { | 687 | int retry = 0; |
795 | if (*fmt != L_ESC || *++fmt == L_ESC) { | 688 | again: |
796 | lj_buf_putb(sb, *fmt++); | 689 | arg = 1; |
690 | sb = lj_buf_tmp_(L); | ||
691 | sfmt = lj_lib_checkstr(L, arg); | ||
692 | lj_strfmt_init(&fs, strdata(sfmt), sfmt->len); | ||
693 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { | ||
694 | if (sf == STRFMT_LIT) { | ||
695 | lj_buf_putmem(sb, fs.str, fs.len); | ||
696 | } else if (sf == STRFMT_ERR) { | ||
697 | lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); | ||
797 | } else { | 698 | } else { |
798 | char buf[STRING_FMT_MAXBUF]; | ||
799 | char spec[STRING_FMT_MAXSPEC]; | ||
800 | MSize len = 0; | ||
801 | if (++arg > top) | 699 | if (++arg > top) |
802 | luaL_argerror(L, arg, lj_obj_typename[0]); | 700 | luaL_argerror(L, arg, lj_obj_typename[0]); |
803 | fmt = string_fmt_scan(L, spec, fmt); | 701 | switch (STRFMT_TYPE(sf)) { |
804 | switch (*fmt++) { | 702 | case STRFMT_INT: |
805 | case 'c': | 703 | if (tvisint(L->base+arg-1)) { |
806 | len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg)); | 704 | int32_t k = intV(L->base+arg-1); |
705 | if (sf == STRFMT_INT) | ||
706 | lj_buf_putint(sb, k); /* Shortcut for plain %d. */ | ||
707 | else | ||
708 | lj_strfmt_putxint(sb, sf, k); | ||
709 | } else { | ||
710 | lj_strfmt_putnum_int(sb, sf, lj_lib_checknum(L, arg)); | ||
711 | } | ||
807 | break; | 712 | break; |
808 | case 'd': case 'i': | 713 | case STRFMT_UINT: |
809 | string_fmt_intfmt(spec); | 714 | if (tvisint(L->base+arg-1)) |
810 | len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg)); | 715 | lj_strfmt_putxint(sb, sf, intV(L->base+arg-1)); |
716 | else | ||
717 | lj_strfmt_putnum_uint(sb, sf, lj_lib_checknum(L, arg)); | ||
811 | break; | 718 | break; |
812 | case 'o': case 'u': case 'x': case 'X': | 719 | case STRFMT_NUM: |
813 | string_fmt_intfmt(spec); | 720 | lj_strfmt_putnum(sb, sf, lj_lib_checknum(L, arg)); |
814 | len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg)); | ||
815 | break; | 721 | break; |
816 | case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { | 722 | case STRFMT_STR: { |
817 | TValue tv; | 723 | GCstr *str = string_fmt_tostring(L, arg, retry); |
818 | tv.n = lj_lib_checknum(L, arg); | 724 | if (str == NULL) |
819 | if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { | 725 | retry = 1; |
820 | /* Canonicalize output of non-finite values. */ | 726 | else if ((sf & STRFMT_T_QUOTED)) |
821 | char nbuf[LJ_STR_NUMBUF]; | 727 | lj_strfmt_putquoted(sb, str); |
822 | char *p = lj_str_bufnum(nbuf, &tv); | 728 | else |
823 | if (fmt[-1] < 'a') { *(p-3) -= 0x20; *(p-2) -= 0x20; *(p-1) -= 0x20; } | 729 | lj_strfmt_putstr(sb, sf, str); |
824 | *p = '\0'; | ||
825 | for (p = spec; *p < 'A' && *p != '.'; p++) ; | ||
826 | *p++ = 's'; *p = '\0'; | ||
827 | len = (MSize)sprintf(buf, spec, nbuf); | ||
828 | break; | ||
829 | } | ||
830 | len = (MSize)sprintf(buf, spec, (double)tv.n); | ||
831 | break; | 730 | break; |
832 | } | 731 | } |
833 | case 'q': | 732 | case STRFMT_CHAR: |
834 | string_fmt_quoted(sb, lj_lib_checkstr(L, arg)); | 733 | lj_strfmt_putchar(sb, sf, lj_lib_checkint(L, arg)); |
835 | continue; | 734 | break; |
836 | case 'p': | 735 | case STRFMT_PTR: /* No formatting. */ |
837 | setsbufP(sb, lj_str_bufptr(lj_buf_more(sb, LJ_STR_PTRBUF), | 736 | setsbufP(sb, lj_str_bufptr(lj_buf_more(sb, LJ_STR_PTRBUF), |
838 | lua_topointer(L, arg))); | 737 | lua_topointer(L, arg))); |
839 | continue; | ||
840 | case 's': { | ||
841 | GCstr *str = string_fmt_tostring(L, arg); | ||
842 | if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */ | ||
843 | lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */ | ||
844 | continue; | ||
845 | } | ||
846 | len = (MSize)sprintf(buf, spec, strdata(str)); | ||
847 | break; | 738 | break; |
848 | } | ||
849 | default: | 739 | default: |
850 | lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' '); | 740 | lua_assert(0); |
851 | break; | 741 | break; |
852 | } | 742 | } |
853 | lj_buf_putmem(sb, buf, len); | ||
854 | } | 743 | } |
855 | } | 744 | } |
745 | if (retry++ == 1) goto again; | ||
856 | setstrV(L, L->top-1, lj_buf_str(L, sb)); | 746 | setstrV(L, L->top-1, lj_buf_str(L, sb)); |
857 | lj_gc_check(L); | 747 | lj_gc_check(L); |
858 | return 1; | 748 | return 1; |