1 files changed, 393 insertions, 0 deletions
diff --git a/src/lj_lex.c b/src/lj_lex.c
new file mode 100644
index 00000000..38b0a7d4
--- /dev/null
+++ b/src/lj_lex.c
@@ -0,0 +1,393 @@
+/*
+** Lexical analyzer.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+#define lj_lex_c
+#define LUA_CORE
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_lex.h"
+#include "lj_parse.h"
+#include "lj_ctype.h"
+/* Lua lexer token names. */
+static const char *const tokennames[] = {
+#define TKSTR1(name)            #name,
+#define TKSTR2(name, sym)       #sym,
+TKDEF(TKSTR1, TKSTR2)
+#undef TKSTR1
+#undef TKSTR2
+  NULL
+};
+/* -- Buffer handling ----------------------------------------------------- */
+#define char2int(c)             cast(int, cast(uint8_t, (c)))
+#define next(ls) \
+  (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
+#define save_and_next(ls)       (save(ls, ls->current), next(ls))
+#define currIsNewline(ls)       (ls->current == '\n' || ls->current == '\r')
+#define END_OF_STREAM           (-1)
+static int fillbuf(LexState *ls)
+{
+  size_t sz;
+  const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
+  if (buf == NULL || sz == 0) return END_OF_STREAM;
+  ls->n = (MSize)sz - 1;
+  ls->p = buf;
+  return char2int(*(ls->p++));
+}
+static void save(LexState *ls, int c)
+{
+  if (ls->sb.n + 1 > ls->sb.sz) {
+    MSize newsize;
+    if (ls->sb.sz >= LJ_MAX_STR/2)
+      lj_lex_error(ls, 0, LJ_ERR_XELEM);
+    newsize = ls->sb.sz * 2;
+    lj_str_resizebuf(ls->L, &ls->sb, newsize);
+  }
+  ls->sb.buf[ls->sb.n++] = cast(char, c);
+}
+static int check_next(LexState *ls, const char *set)
+{
+  if (!strchr(set, ls->current))
+    return 0;
+  save_and_next(ls);
+  return 1;
+}
+static void inclinenumber(LexState *ls)
+{
+  int old = ls->current;
+  lua_assert(currIsNewline(ls));
+  next(ls);  /* skip `\n' or `\r' */
+  if (currIsNewline(ls) && ls->current != old)
+    next(ls);  /* skip `\n\r' or `\r\n' */
+  if (++ls->linenumber >= LJ_MAX_LINE)
+    lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
+}
+/* -- Scanner for terminals ----------------------------------------------- */
+static void read_numeral(LexState *ls, TValue *tv)
+{
+  lua_assert(lj_ctype_isdigit(ls->current));
+  do {
+    save_and_next(ls);
+  } while (lj_ctype_isdigit(ls->current) || ls->current == '.');
+  if (check_next(ls, "Ee"))  /* `E'? */
+    check_next(ls, "+-");  /* optional exponent sign */
+  while (lj_ctype_isident(ls->current))
+    save_and_next(ls);
+  save(ls, '\0');
+  if (!lj_str_numconv(ls->sb.buf, tv))
+    lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
+}
+static int skip_sep(LexState *ls)
+{
+  int count = 0;
+  int s = ls->current;
+  lua_assert(s == '[' || s == ']');
+  save_and_next(ls);
+  while (ls->current == '=') {
+    save_and_next(ls);
+    count++;
+  }
+  return (ls->current == s) ? count : (-count) - 1;
+}
+static void read_long_string(LexState *ls, TValue *tv, int sep)
+{
+  save_and_next(ls);  /* skip 2nd `[' */
+  if (currIsNewline(ls))  /* string starts with a newline? */
+    inclinenumber(ls);  /* skip it */
+  for (;;) {
+    switch (ls->current) {
+    case END_OF_STREAM:
+      lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
+      break;
+    case ']':
+      if (skip_sep(ls) == sep) {
+        save_and_next(ls);  /* skip 2nd `]' */
+        goto endloop;
+      }
+      break;
+    case '\n':
+    case '\r':
+      save(ls, '\n');
+      inclinenumber(ls);
+      if (!tv) lj_str_resetbuf(&ls->sb);  /* avoid wasting space */
+      break;
+    default:
+      if (tv) save_and_next(ls);
+      else next(ls);
+      break;
+    }
+  } endloop:
+  if (tv) {
+    GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
+                                      ls->sb.n - 2*(2 + (MSize)sep));
+    setstrV(ls->L, tv, str);
+  }
+}
+static void read_string(LexState *ls, int delim, TValue *tv)
+{
+  save_and_next(ls);
+  while (ls->current != delim) {
+    switch (ls->current) {
+    case END_OF_STREAM:
+      lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
+      continue;
+    case '\n':
+    case '\r':
+      lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
+      continue;
+    case '\\': {
+      int c;
+      next(ls);  /* do not save the `\' */
+      switch (ls->current) {
+      case 'a': c = '\a'; break;
+      case 'b': c = '\b'; break;
+      case 'f': c = '\f'; break;
+      case 'n': c = '\n'; break;
+      case 'r': c = '\r'; break;
+      case 't': c = '\t'; break;
+      case 'v': c = '\v'; break;
+      case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
+      case END_OF_STREAM: continue;  /* will raise an error next loop */
+      default:
+        if (!lj_ctype_isdigit(ls->current)) {
+          save_and_next(ls);  /* handles \\, \", \', and \? */
+        } else {  /* \xxx */
+          int i = 0;
+          c = 0;
+          do {
+            c = 10*c + (ls->current-'0');
+            next(ls);
+          } while (++i<3 && lj_ctype_isdigit(ls->current));
+          if (c > UCHAR_MAX)
+            lj_lex_error(ls, TK_string, LJ_ERR_XESC);
+          save(ls, c);
+        }
+        continue;
+      }
+      save(ls, c);
+      next(ls);
+      continue;
+      }
+    default:
+      save_and_next(ls);
+      break;
+    }
+  }
+  save_and_next(ls);  /* skip delimiter */
+  setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
+}
+/* -- Main lexical scanner ------------------------------------------------ */
+static int llex(LexState *ls, TValue *tv)
+{
+  lj_str_resetbuf(&ls->sb);
+  for (;;) {
+    if (lj_ctype_isident(ls->current)) {
+      GCstr *s;
+      if (lj_ctype_isdigit(ls->current)) {  /* Numeric literal. */
+        read_numeral(ls, tv);
+        return TK_number;
+      }
+      /* Identifier or reserved word. */
+      do {
+        save_and_next(ls);
+      } while (lj_ctype_isident(ls->current));
+      s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
+      if (s->reserved > 0)  /* Reserved word? */
+        return TK_OFS + s->reserved;
+      setstrV(ls->L, tv, s);
+      return TK_name;
+    }
+    switch (ls->current) {
+    case '\n':
+    case '\r':
+      inclinenumber(ls);
+      continue;
+    case ' ':
+    case '\t':
+    case '\v':
+    case '\f':
+      next(ls);
+      continue;
+    case '-':
+      next(ls);
+      if (ls->current != '-') return '-';
+      /* else is a comment */
+      next(ls);
+      if (ls->current == '[') {
+        int sep = skip_sep(ls);
+        lj_str_resetbuf(&ls->sb);  /* `skip_sep' may dirty the buffer */
+        if (sep >= 0) {
+          read_long_string(ls, NULL, sep);  /* long comment */
+          lj_str_resetbuf(&ls->sb);
+          continue;
+        }
+      }
+      /* else short comment */
+      while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
+        next(ls);
+      continue;
+    case '[': {
+      int sep = skip_sep(ls);
+      if (sep >= 0) {
+        read_long_string(ls, tv, sep);
+        return TK_string;
+      } else if (sep == -1) {
+        return '[';
+      } else {
+        lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
+        continue;
+      }
+      }
+    case '=':
+      next(ls);
+      if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
+    case '<':
+      next(ls);
+      if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
+    case '>':
+      next(ls);
+      if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
+    case '~':
+      next(ls);
+      if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
+    case '"':
+    case '\'':
+      read_string(ls, ls->current, tv);
+      return TK_string;
+    case '.':
+      save_and_next(ls);
+      if (check_next(ls, ".")) {
+        if (check_next(ls, "."))
+          return TK_dots;   /* ... */
+        else
+          return TK_concat;   /* .. */
+      } else if (!lj_ctype_isdigit(ls->current)) {
+        return '.';
+      } else {
+        read_numeral(ls, tv);
+        return TK_number;
+      }
+    case END_OF_STREAM:
+      return TK_eof;
+    default: {
+      int c = ls->current;
+      next(ls);
+      return c;  /* Single-char tokens (+ - / ...). */
+    }
+    }
+  }
+}
+/* -- Lexer API ----------------------------------------------------------- */
+void lj_lex_start(lua_State *L, LexState *ls)
+{
+  ls->L = L;
+  ls->fs = NULL;
+  ls->n = 0;
+  ls->p = NULL;
+  ls->lookahead = TK_eof;  /* No look-ahead token. */
+  ls->linenumber = 1;
+  ls->lastline = 1;
+  lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
+  next(ls);  /* Read-ahead first char. */
+  if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
+      char2int(ls->p[1]) == 0xbf) {  /* Skip UTF-8 BOM (if buffered). */
+    ls->n -= 2;
+    ls->p += 2;
+    next(ls);
+  }
+  if (ls->current == '#') {  /* Skip POSIX #! header line. */
+    do {
+      next(ls);
+      if (ls->current == END_OF_STREAM) return;
+    } while (!currIsNewline(ls));
+    inclinenumber(ls);
+  }
+  if (ls->current == LUA_SIGNATURE[0]) {
+    setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XBCLOAD));
+    lj_err_throw(L, LUA_ERRSYNTAX);
+  }
+  /* This is an unanchored GCstr before it's stored in the prototype.
+  ** Do this last since next() calls the reader which may call the GC.
+  */
+  ls->chunkname = lj_str_newz(L, ls->chunkarg);
+}
+void lj_lex_next(LexState *ls)
+{
+  ls->lastline = ls->linenumber;
+  if (LJ_LIKELY(ls->lookahead == TK_eof)) {  /* No lookahead token? */
+    ls->token = llex(ls, &ls->tokenval);  /* Get next token. */
+  } else {  /* Otherwise return lookahead token. */
+    ls->token = ls->lookahead;
+    ls->lookahead = TK_eof;
+    ls->tokenval = ls->lookaheadval;
+  }
+}
+LexToken lj_lex_lookahead(LexState *ls)
+{
+  lua_assert(ls->lookahead == TK_eof);
+  ls->lookahead = llex(ls, &ls->lookaheadval);
+  return ls->lookahead;
+}
+const char *lj_lex_token2str(LexState *ls, LexToken token)
+{
+  if (token > TK_OFS)
+    return tokennames[token-TK_OFS-1];
+  else if (!lj_ctype_iscntrl(token))
+    return lj_str_pushf(ls->L, "%c", token);
+  else
+    return lj_str_pushf(ls->L, "char(%d)", token);
+}
+void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
+{
+  const char *tok;
+  va_list argp;
+  if (token == 0) {
+    tok = NULL;
+  } else if (token == TK_name || token == TK_string || token == TK_number) {
+    save(ls, '\0');
+    tok = ls->sb.buf;
+  } else {
+    tok = lj_lex_token2str(ls, token);
+  }
+  va_start(argp, em);
+  lj_err_lex(ls->L, strdata(ls->chunkname), tok, ls->linenumber, em, argp);
+  va_end(argp);
+}
+void lj_lex_init(lua_State *L)
+{
+  uint32_t i;
+  for (i = 0; i < TK_RESERVED; i++) {
+    GCstr *s = lj_str_newz(L, tokennames[i]);
+    fixstring(s);  /* Reserved words are never collected. */
+    s->reserved = cast_byte(i+1);
+  }
+}

diff --git a/src/lj_lex.c b/src/lj_lex.c new file mode 100644 index 00000000..38b0a7d4 --- /dev/null +++ b/src/lj_lex.c
@@ -0,0 +1,393 @@
	1	/*
	2	** Lexical analyzer.
	3	** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
	4	**
	5	** Major portions taken verbatim or adapted from the Lua interpreter.
	6	** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
	7	*/
	8
	9	#define lj_lex_c
	10	#define LUA_CORE
	11
	12	#include "lj_obj.h"
	13	#include "lj_gc.h"
	14	#include "lj_err.h"
	15	#include "lj_str.h"
	16	#include "lj_lex.h"
	17	#include "lj_parse.h"
	18	#include "lj_ctype.h"
	19
	20	/* Lua lexer token names. */
	21	static const char *const tokennames[] = {
	22	#define TKSTR1(name) #name,
	23	#define TKSTR2(name, sym) #sym,
	24	TKDEF(TKSTR1, TKSTR2)
	25	#undef TKSTR1
	26	#undef TKSTR2
	27	NULL
	28	};
	29
	30	/* -- Buffer handling ----------------------------------------------------- */
	31
	32	#define char2int(c) cast(int, cast(uint8_t, (c)))
	33	#define next(ls) \
	34	(ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
	35	#define save_and_next(ls) (save(ls, ls->current), next(ls))
	36	#define currIsNewline(ls) (ls->current == '\n' \|\| ls->current == '\r')
	37	#define END_OF_STREAM (-1)
	38
	39	static int fillbuf(LexState *ls)
	40	{
	41	size_t sz;
	42	const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
	43	if (buf == NULL \|\| sz == 0) return END_OF_STREAM;
	44	ls->n = (MSize)sz - 1;
	45	ls->p = buf;
	46	return char2int(*(ls->p++));
	47	}
	48
	49	static void save(LexState *ls, int c)
	50	{
	51	if (ls->sb.n + 1 > ls->sb.sz) {
	52	MSize newsize;
	53	if (ls->sb.sz >= LJ_MAX_STR/2)
	54	lj_lex_error(ls, 0, LJ_ERR_XELEM);
	55	newsize = ls->sb.sz * 2;
	56	lj_str_resizebuf(ls->L, &ls->sb, newsize);
	57	}
	58	ls->sb.buf[ls->sb.n++] = cast(char, c);
	59	}
	60
	61	static int check_next(LexState ls, const char set)
	62	{
	63	if (!strchr(set, ls->current))
	64	return 0;
	65	save_and_next(ls);
	66	return 1;
	67	}
	68
	69	static void inclinenumber(LexState *ls)
	70	{
	71	int old = ls->current;
	72	lua_assert(currIsNewline(ls));
	73	next(ls); /* skip `\n' or `\r' */
	74	if (currIsNewline(ls) && ls->current != old)
	75	next(ls); /* skip `\n\r' or `\r\n' */
	76	if (++ls->linenumber >= LJ_MAX_LINE)
	77	lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
	78	}
	79
	80	/* -- Scanner for terminals ----------------------------------------------- */
	81
	82	static void read_numeral(LexState ls, TValue tv)
	83	{
	84	lua_assert(lj_ctype_isdigit(ls->current));
	85	do {
	86	save_and_next(ls);
	87	} while (lj_ctype_isdigit(ls->current) \|\| ls->current == '.');
	88	if (check_next(ls, "Ee")) /* `E'? */
	89	check_next(ls, "+-"); /* optional exponent sign */
	90	while (lj_ctype_isident(ls->current))
	91	save_and_next(ls);
	92	save(ls, '\0');
	93	if (!lj_str_numconv(ls->sb.buf, tv))
	94	lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
	95	}
	96
	97	static int skip_sep(LexState *ls)
	98	{
	99	int count = 0;
	100	int s = ls->current;
	101	lua_assert(s == '[' \|\| s == ']');
	102	save_and_next(ls);
	103	while (ls->current == '=') {
	104	save_and_next(ls);
	105	count++;
	106	}
	107	return (ls->current == s) ? count : (-count) - 1;
	108	}
	109
	110	static void read_long_string(LexState ls, TValue tv, int sep)
	111	{
	112	save_and_next(ls); /* skip 2nd `[' */
	113	if (currIsNewline(ls)) /* string starts with a newline? */
	114	inclinenumber(ls); /* skip it */
	115	for (;;) {
	116	switch (ls->current) {
	117	case END_OF_STREAM:
	118	lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
	119	break;
	120	case ']':
	121	if (skip_sep(ls) == sep) {
	122	save_and_next(ls); /* skip 2nd `]' */
	123	goto endloop;
	124	}
	125	break;
	126	case '\n':
	127	case '\r':
	128	save(ls, '\n');
	129	inclinenumber(ls);
	130	if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */
	131	break;
	132	default:
	133	if (tv) save_and_next(ls);
	134	else next(ls);
	135	break;
	136	}
	137	} endloop:
	138	if (tv) {
	139	GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
	140	ls->sb.n - 2*(2 + (MSize)sep));
	141	setstrV(ls->L, tv, str);
	142	}
	143	}
	144
	145	static void read_string(LexState ls, int delim, TValue tv)
	146	{
	147	save_and_next(ls);
	148	while (ls->current != delim) {
	149	switch (ls->current) {
	150	case END_OF_STREAM:
	151	lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
	152	continue;
	153	case '\n':
	154	case '\r':
	155	lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
	156	continue;
	157	case '\\': {
	158	int c;
	159	next(ls); /* do not save the `\' */
	160	switch (ls->current) {
	161	case 'a': c = '\a'; break;
	162	case 'b': c = '\b'; break;
	163	case 'f': c = '\f'; break;
	164	case 'n': c = '\n'; break;
	165	case 'r': c = '\r'; break;
	166	case 't': c = '\t'; break;
	167	case 'v': c = '\v'; break;
	168	case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
	169	case END_OF_STREAM: continue; /* will raise an error next loop */
	170	default:
	171	if (!lj_ctype_isdigit(ls->current)) {
	172	save_and_next(ls); /* handles \\, \", \', and \? */
	173	} else { /* \xxx */
	174	int i = 0;
	175	c = 0;
	176	do {
	177	c = 10*c + (ls->current-'0');
	178	next(ls);
	179	} while (++i<3 && lj_ctype_isdigit(ls->current));
	180	if (c > UCHAR_MAX)
	181	lj_lex_error(ls, TK_string, LJ_ERR_XESC);
	182	save(ls, c);
	183	}
	184	continue;
	185	}
	186	save(ls, c);
	187	next(ls);
	188	continue;
	189	}
	190	default:
	191	save_and_next(ls);
	192	break;
	193	}
	194	}
	195	save_and_next(ls); /* skip delimiter */
	196	setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
	197	}
	198
	199	/* -- Main lexical scanner ------------------------------------------------ */
	200
	201	static int llex(LexState ls, TValue tv)
	202	{
	203	lj_str_resetbuf(&ls->sb);
	204	for (;;) {
	205	if (lj_ctype_isident(ls->current)) {
	206	GCstr *s;
	207	if (lj_ctype_isdigit(ls->current)) { /* Numeric literal. */
	208	read_numeral(ls, tv);
	209	return TK_number;
	210	}
	211	/* Identifier or reserved word. */
	212	do {
	213	save_and_next(ls);
	214	} while (lj_ctype_isident(ls->current));
	215	s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
	216	if (s->reserved > 0) /* Reserved word? */
	217	return TK_OFS + s->reserved;
	218	setstrV(ls->L, tv, s);
	219	return TK_name;
	220	}
	221	switch (ls->current) {
	222	case '\n':
	223	case '\r':
	224	inclinenumber(ls);
	225	continue;
	226	case ' ':
	227	case '\t':
	228	case '\v':
	229	case '\f':
	230	next(ls);
	231	continue;
	232	case '-':
	233	next(ls);
	234	if (ls->current != '-') return '-';
	235	/* else is a comment */
	236	next(ls);
	237	if (ls->current == '[') {
	238	int sep = skip_sep(ls);
	239	lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
	240	if (sep >= 0) {
	241	read_long_string(ls, NULL, sep); /* long comment */
	242	lj_str_resetbuf(&ls->sb);
	243	continue;
	244	}
	245	}
	246	/* else short comment */
	247	while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
	248	next(ls);
	249	continue;
	250	case '[': {
	251	int sep = skip_sep(ls);
	252	if (sep >= 0) {
	253	read_long_string(ls, tv, sep);
	254	return TK_string;
	255	} else if (sep == -1) {
	256	return '[';
	257	} else {
	258	lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
	259	continue;
	260	}
	261	}
	262	case '=':
	263	next(ls);
	264	if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
	265	case '<':
	266	next(ls);
	267	if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
	268	case '>':
	269	next(ls);
	270	if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
	271	case '~':
	272	next(ls);
	273	if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
	274	case '"':
	275	case '\'':
	276	read_string(ls, ls->current, tv);
	277	return TK_string;
	278	case '.':
	279	save_and_next(ls);
	280	if (check_next(ls, ".")) {
	281	if (check_next(ls, "."))
	282	return TK_dots; /* ... */
	283	else
	284	return TK_concat; /* .. */
	285	} else if (!lj_ctype_isdigit(ls->current)) {
	286	return '.';
	287	} else {
	288	read_numeral(ls, tv);
	289	return TK_number;
	290	}
	291	case END_OF_STREAM:
	292	return TK_eof;
	293	default: {
	294	int c = ls->current;
	295	next(ls);
	296	return c; /* Single-char tokens (+ - / ...). */
	297	}
	298	}
	299	}
	300	}
	301
	302	/* -- Lexer API ----------------------------------------------------------- */
	303
	304	void lj_lex_start(lua_State L, LexState ls)
	305	{
	306	ls->L = L;
	307	ls->fs = NULL;
	308	ls->n = 0;
	309	ls->p = NULL;
	310	ls->lookahead = TK_eof; /* No look-ahead token. */
	311	ls->linenumber = 1;
	312	ls->lastline = 1;
	313	lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
	314	next(ls); /* Read-ahead first char. */
	315	if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
	316	char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
	317	ls->n -= 2;
	318	ls->p += 2;
	319	next(ls);
	320	}
	321	if (ls->current == '#') { /* Skip POSIX #! header line. */
	322	do {
	323	next(ls);
	324	if (ls->current == END_OF_STREAM) return;
	325	} while (!currIsNewline(ls));
	326	inclinenumber(ls);
	327	}
	328	if (ls->current == LUA_SIGNATURE[0]) {
	329	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XBCLOAD));
	330	lj_err_throw(L, LUA_ERRSYNTAX);
	331	}
	332	/* This is an unanchored GCstr before it's stored in the prototype.
	333	** Do this last since next() calls the reader which may call the GC.
	334	*/
	335	ls->chunkname = lj_str_newz(L, ls->chunkarg);
	336	}
	337
	338	void lj_lex_next(LexState *ls)
	339	{
	340	ls->lastline = ls->linenumber;
	341	if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
	342	ls->token = llex(ls, &ls->tokenval); /* Get next token. */
	343	} else { /* Otherwise return lookahead token. */
	344	ls->token = ls->lookahead;
	345	ls->lookahead = TK_eof;
	346	ls->tokenval = ls->lookaheadval;
	347	}
	348	}
	349
	350	LexToken lj_lex_lookahead(LexState *ls)
	351	{
	352	lua_assert(ls->lookahead == TK_eof);
	353	ls->lookahead = llex(ls, &ls->lookaheadval);
	354	return ls->lookahead;
	355	}
	356
	357	const char lj_lex_token2str(LexState ls, LexToken token)
	358	{
	359	if (token > TK_OFS)
	360	return tokennames[token-TK_OFS-1];
	361	else if (!lj_ctype_iscntrl(token))
	362	return lj_str_pushf(ls->L, "%c", token);
	363	else
	364	return lj_str_pushf(ls->L, "char(%d)", token);
	365	}
	366
	367	void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
	368	{
	369	const char *tok;
	370	va_list argp;
	371	if (token == 0) {
	372	tok = NULL;
	373	} else if (token == TK_name \|\| token == TK_string \|\| token == TK_number) {
	374	save(ls, '\0');
	375	tok = ls->sb.buf;
	376	} else {
	377	tok = lj_lex_token2str(ls, token);
	378	}
	379	va_start(argp, em);
	380	lj_err_lex(ls->L, strdata(ls->chunkname), tok, ls->linenumber, em, argp);
	381	va_end(argp);
	382	}
	383
	384	void lj_lex_init(lua_State *L)
	385	{
	386	uint32_t i;
	387	for (i = 0; i < TK_RESERVED; i++) {
	388	GCstr *s = lj_str_newz(L, tokennames[i]);
	389	fixstring(s); /* Reserved words are never collected. */
	390	s->reserved = cast_byte(i+1);
	391	}
	392	}
	393