From e61cce21ef7def2c2e52dd6cb6a5b540ebcac29d Mon Sep 17 00:00:00 2001 From: Li Jin Date: Thu, 17 Aug 2023 10:36:15 +0800 Subject: add unicode identifier support, fix more issue from #144 --- src/yuescript/parser.cpp | 39 +++++ src/yuescript/parser.hpp | 2 + src/yuescript/yue_ast.cpp | 4 + src/yuescript/yue_ast.h | 21 ++- src/yuescript/yue_compiler.cpp | 370 +++++++++++++++++++++++++---------------- src/yuescript/yue_parser.cpp | 54 ++++-- src/yuescript/yue_parser.h | 5 +- 7 files changed, 320 insertions(+), 175 deletions(-) (limited to 'src') diff --git a/src/yuescript/parser.cpp b/src/yuescript/parser.cpp index 04b7ffd..a7d695e 100644 --- a/src/yuescript/parser.cpp +++ b/src/yuescript/parser.cpp @@ -341,6 +341,41 @@ private: } }; +// right interval expression. +class _larger : public _expr { +public: + // constructor from ansi string. + _larger(size_t value) + : m_value(value) { + } + + // parse with whitespace + virtual bool parse_non_term(_context& con) const override { + return _parse(con); + } + + // parse terminal + virtual bool parse_term(_context& con) const override { + return _parse(con); + } + +private: + size_t m_value; + + // internal parse + bool _parse(_context& con) const { + if (!con.end()) { + size_t ch = con.symbol(); + if (ch > m_value) { + con.next_col(); + return true; + } + } + con.set_error_pos(); + return false; + } +}; + // base class for unary expressions class _unary : public _expr { public: @@ -1503,6 +1538,10 @@ expr range(int min, int max) { return _private::construct_expr(new _set(min, max)); } +expr larger(size_t value) { + return _private::construct_expr(new _larger(value)); +} + /** creates an expression which increments the line counter and resets the column counter when the given expression is parsed successfully; used for newline characters. diff --git a/src/yuescript/parser.hpp b/src/yuescript/parser.hpp index d2612cd..a0ec396 100644 --- a/src/yuescript/parser.hpp +++ b/src/yuescript/parser.hpp @@ -346,6 +346,8 @@ expr set(const char* s); */ expr range(int min, int max); +expr larger(size_t value); + /** creates an expression which increments the line counter and resets the column counter when the given expression is parsed successfully; used for newline characters. diff --git a/src/yuescript/yue_ast.cpp b/src/yuescript/yue_ast.cpp index fee15af..105bd74 100644 --- a/src/yuescript/yue_ast.cpp +++ b/src/yuescript/yue_ast.cpp @@ -69,6 +69,10 @@ std::string Name_t::to_string(void* ud) const { auto info = reinterpret_cast(ud); return info->convert(this); } +std::string UnicodeName_t::to_string(void* ud) const { + auto info = reinterpret_cast(ud); + return info->convert(this); +} std::string Self_t::to_string(void*) const { return "@"s; } diff --git a/src/yuescript/yue_ast.h b/src/yuescript/yue_ast.h index 807d01b..a7f897c 100644 --- a/src/yuescript/yue_ast.h +++ b/src/yuescript/yue_ast.h @@ -86,13 +86,16 @@ AST_END(Num, "num"sv) AST_LEAF(Name) AST_END(Name, "name"sv) +AST_LEAF(UnicodeName) +AST_END(UnicodeName, "unicode_name"sv) + AST_NODE(Variable) - ast_ptr name; + ast_sel name; AST_MEMBER(Variable, &name) AST_END(Variable, "variable"sv) AST_NODE(LabelName) - ast_ptr name; + ast_ptr name; AST_MEMBER(LabelName, &name) AST_END(LabelName, "label_name"sv) @@ -105,7 +108,7 @@ AST_LEAF(Self) AST_END(Self, "self"sv) AST_NODE(SelfName) - ast_ptr name; + ast_sel name; AST_MEMBER(SelfName, &name) AST_END(SelfName, "self_name"sv) @@ -113,7 +116,7 @@ AST_LEAF(SelfClass) AST_END(SelfClass, "self_class"sv) AST_NODE(SelfClassName) - ast_ptr name; + ast_sel name; AST_MEMBER(SelfClassName, &name) AST_END(SelfClassName, "self_class_name"sv) @@ -123,7 +126,7 @@ AST_NODE(SelfItem) AST_END(SelfItem, "self_item"sv) AST_NODE(KeyName) - ast_sel name; + ast_sel name; AST_MEMBER(KeyName, &name) AST_END(KeyName, "key_name"sv) @@ -597,12 +600,12 @@ AST_NODE(Metamethod) AST_END(Metamethod, "metamethod"sv) AST_NODE(DotChainItem) - ast_sel name; + ast_sel name; AST_MEMBER(DotChainItem, &name) AST_END(DotChainItem, "dot_chain_item"sv) AST_NODE(ColonChainItem) - ast_sel name; + ast_sel name; bool switchToDot = false; AST_MEMBER(ColonChainItem, &name) AST_END(ColonChainItem, "colon_chain_item"sv) @@ -761,7 +764,7 @@ AST_NODE(FunLit) AST_END(FunLit, "fun_lit"sv) AST_NODE(MacroName) - ast_ptr name; + ast_ptr name; AST_MEMBER(MacroName, &name) AST_END(MacroName, "macro_name"sv) @@ -777,7 +780,7 @@ AST_NODE(MacroInPlace) AST_END(MacroInPlace, "macro_in_place"sv) AST_NODE(Macro) - ast_ptr name; + ast_ptr name; ast_ptr macroLit; AST_MEMBER(Macro, &name, ¯oLit) AST_END(Macro, "macro"sv) diff --git a/src/yuescript/yue_compiler.cpp b/src/yuescript/yue_compiler.cpp index ab35364..40f12af 100644 --- a/src/yuescript/yue_compiler.cpp +++ b/src/yuescript/yue_compiler.cpp @@ -74,7 +74,7 @@ static std::unordered_set Metamethods = { "close"s // Lua 5.4 }; -const std::string_view version = "0.18.1"sv; +const std::string_view version = "0.19.1"sv; const std::string_view extension = "yue"sv; class CompileError : public std::logic_error { @@ -621,7 +621,7 @@ private: do { newName = nameStr + std::to_string(index); index++; - } while (isSolidDefined(newName)); + } while (isSolidDefined(newName) || _info.usedNames.find(newName) != _info.usedNames.end()); return newName; } @@ -966,6 +966,7 @@ private: EndWithEOP, HasEOP, HasKeyword, + HasUnicode, Macro, Metatable, MetaFieldInvocation @@ -989,10 +990,20 @@ private: ChainType type = ChainType::Common; for (auto item : chainValue->items.objects()) { if (auto colonChain = ast_cast(item)) { - if (ast_is(colonChain->name)) { - type = ChainType::HasKeyword; - } else if (auto meta = colonChain->name.as(); meta && !meta->item.is()) { - return ChainType::MetaFieldInvocation; + switch (colonChain->name->get_id()) { + case id(): + type = ChainType::HasKeyword; + break; + case id(): + type = ChainType::HasUnicode; + break; + case id(): { + auto meta = static_cast(colonChain->name.get()); + if (!meta->item.is()) { + return ChainType::MetaFieldInvocation; + } + break; + } } } else if (ast_is(item)) { return ChainType::HasEOP; @@ -1021,6 +1032,30 @@ private: return Empty; } + std::string unicodeVariableFrom(UnicodeName_t* uname) { + std::ostringstream buf; + for (auto it = uname->m_begin.m_it; it != uname->m_end.m_it; ++it) { + auto ch = *it; + if (ch > 255) { + buf << "_u"sv << std::hex << ch; + } else { + buf << static_cast(ch); + } + } + return buf.str(); + } + + std::string variableToString(Variable_t* var) { + switch (var->name->get_id()) { + case id(): + return _parser.toString(var->name); + case id(): { + return unicodeVariableFrom(static_cast(var->name.get())); + } + default: YUEE("AST node mismatch", var->name); return std::string(); + } + } + std::string singleVariableFrom(ast_node* expList, bool accessing) { if (!ast_is(expList)) return Empty; BLOCK_START @@ -1067,9 +1102,11 @@ private: auto firstItem = chainItems.back(); if (auto callable = ast_cast(firstItem)) { switch (callable->item->get_id()) { - case id(): - checkConst(_parser.toString(callable->item.get()), callable->item.get()); + case id(): { + auto var = static_cast(callable->item.get()); + checkConst(variableToString(var), callable->item.get()); return true; + } case id(): return true; } @@ -1123,7 +1160,7 @@ private: if (auto assignableChain = ast_cast(assignable->item)) { return isAssignable(assignableChain->items.objects()); } else if (auto variable = assignable->item.as()) { - checkConst(_parser.toString(variable), variable); + checkConst(variableToString(variable), variable); } return true; } @@ -1524,8 +1561,9 @@ private: Mark }; - str_list transformAssignDefs(ExpList_t* expList, DefOp op) { - str_list defs; + using DefList = std::list>; + DefList transformAssignDefs(ExpList_t* expList, DefOp op) { + DefList defs; for (auto exp_ : expList->exprs.objects()) { auto exp = static_cast(exp_); if (auto value = singleValueFrom(exp)) { @@ -1535,21 +1573,27 @@ private: auto callable = ast_cast(chain->items.front()); BREAK_IF(!callable); std::string name; + std::string unicodeName; if (auto var = callable->item.as()) { - name = _parser.toString(var); + name = variableToString(var); + if (var->name.is()) { + unicodeName = _parser.toString(var->name); + } } else if (auto self = callable->item.as()) { - if (self->name.is()) name = "self"sv; + if (self->name.is()) { + name = "self"sv; + } } BREAK_IF(name.empty()); switch (op) { case DefOp::Mark: - if (addToScope(name)) defs.push_back(name); + if (addToScope(name)) defs.emplace_back(name, unicodeName); break; case DefOp::Check: - if (!isDefined(name)) defs.push_back(name); + if (!isDefined(name)) defs.emplace_back(name, unicodeName); break; case DefOp::Get: - defs.push_back(name); + defs.emplace_back(name, unicodeName); break; } BLOCK_END @@ -1561,6 +1605,15 @@ private: return defs; } + std::string toLocalDecl(const DefList& defs) { + if (defs.empty()) return Empty; + str_list items; + for (const auto& def : defs) { + items.emplace_back(def.first); + } + return indent() + "local "s + join(items, ", "sv); + } + std::string toLocalDecl(const str_list& defs) { if (defs.empty()) return Empty; return indent() + "local "s + join(defs, ", "sv); @@ -1991,6 +2044,7 @@ private: return; } case ChainType::HasKeyword: + case ChainType::HasUnicode: case ChainType::Macro: transformChainValue(chainValue, out, ExpUsage::Assignment, expList); return; @@ -2348,9 +2402,10 @@ private: } auto vp = static_cast(pair); auto name = _parser.toString(vp->name); + auto uname = vp->name->name.as(); auto chain = toAst('.' + name, vp->name); pairs.push_back({toAst(name, vp).get(), - name, + uname ? variableToString(vp->name) : name, chain, defVal}); break; @@ -2365,12 +2420,12 @@ private: auto np = static_cast(pair); ast_ptr keyIndex; if (np->key) { - if (auto key = np->key->get_by_path()) { - auto keyNameStr = _parser.toString(key); - if (LuaKeywords.find(keyNameStr) != LuaKeywords.end()) { - keyIndex = toAst('"' + keyNameStr + '"', key).get(); + if (auto keyName = np->key.as(); keyName && ast_is(keyName->name)) { + auto keyNameStr = _parser.toString(keyName->name); + if (keyName->name.is() || LuaKeywords.find(keyNameStr) != LuaKeywords.end()) { + keyIndex = toAst('"' + keyNameStr + '"', keyName->name).get(); } else { - keyIndex = toAst('.' + keyNameStr, key).get(); + keyIndex = toAst('.' + keyNameStr, keyName->name).get(); } } else if (auto key = np->key->get_by_path()) { auto callable = np->new_ptr(); @@ -2719,7 +2774,17 @@ private: } else if (destruct.items.size() == 1 && !singleValueFrom(*j)) { auto p = destruct.value.get(); auto parens = p->new_ptr(); - parens->expr.set(p); + if (auto tableBlock = ast_cast(p)) { + auto tableLit = p->new_ptr(); + tableLit->values.dup(tableBlock->values); + auto simpleValue = p->new_ptr(); + simpleValue->value.set(tableLit); + parens->expr.set(newExp(simpleValue, p)); + } else if (ast_is(p)) { + parens->expr.set(p); + } else { + YUEE("AST node mismatch", p); + } auto callable = p->new_ptr(); callable->item.set(parens); auto chainValue = p->new_ptr(); @@ -2818,11 +2883,13 @@ private: BREAK_IF(chain->items.size() < 2); if (chain->items.size() == 2) { if (auto callable = ast_cast(chain->items.front())) { - ast_node* var = callable->item.as(); - if (auto self = callable->item.as()) { - var = self->name.as(); + if (auto var = callable->item.as()) { + BREAK_IF(isLocal(variableToString(var))); + } else if (auto self = callable->item.as()) { + if (auto var = self->name.as()) { + BREAK_IF(isLocal(_parser.toString(var))); + } } - BREAK_IF(var && isLocal(_parser.toString(var))); } } auto tmpChain = x->new_ptr(); @@ -2919,7 +2986,7 @@ private: } std::string preDefine = toLocalDecl(defs); for (const auto& def : defs) { - addToScope(def); + addToScope(def.first); } if (preDefine.empty()) { transformExpList(expList, temp); @@ -2932,7 +2999,7 @@ private: } else { std::string preDefine = toLocalDecl(defs); for (const auto& def : defs) { - addToScope(def); + addToScope(def.first); } transformExpList(expList, temp); std::string left = std::move(temp.back()); @@ -3623,47 +3690,7 @@ private: if (auto appendix = stmt->appendix.get()) { switch (appendix->item->get_id()) { case id(): { - auto if_line = static_cast(appendix->item.get()); - auto ifNode = appendix->new_ptr(); - ifNode->type.set(if_line->type); - ifNode->nodes.push_back(if_line->condition); - auto expList = appendix->new_ptr(); - for (auto val : assignAction->values.objects()) { - switch (val->get_id()) { - case id(): - case id(): - case id(): { - auto simpleValue = val->new_ptr(); - simpleValue->value.set(val); - auto exp = newExp(simpleValue, val); - expList->exprs.push_back(exp); - break; - } - case id(): { - auto tableBlock = static_cast(val); - auto tabLit = val->new_ptr(); - tabLit->values.dup(tableBlock->values); - auto simpleValue = val->new_ptr(); - simpleValue->value.set(tabLit); - auto exp = newExp(simpleValue, val); - expList->exprs.push_back(exp); - break; - } - case id(): { - expList->exprs.push_back(val); - break; - } - default: YUEE("AST node mismatch", val); break; - } - } - auto newExpListAssign = assignAction->new_ptr(); - newExpListAssign->expList.set(expList); - auto newStmt = assignAction->new_ptr(); - newStmt->content.set(newExpListAssign); - ifNode->nodes.push_back(newStmt); - auto newSVal = ifNode->new_ptr(); - newSVal->value.set(ifNode); - newInvoke->args.push_back(newExp(newSVal, newSVal)); + throw CompileError("if line decorator is not supported here"sv, appendix->item.get()); break; } case id(): { @@ -3790,7 +3817,7 @@ private: str_list doCloses; pushScope(); for (auto var : localAttrib->leftList.objects()) { - auto varName = _parser.toString(ast_to(var)); + auto varName = variableToString(ast_to(var)); auto closeVar = getUnusedName("_close_"sv); addToScope(closeVar); getCloses.push_back(closeVar + "=assert "s + varName + "."s); @@ -3845,7 +3872,7 @@ private: case id(): { auto values = local->item.to(); for (auto name : values->nameList->names.objects()) { - local->forceDecls.push_back(_parser.toString(name)); + local->forceDecls.push_back(variableToString(static_cast(name))); } break; } @@ -3888,10 +3915,10 @@ private: if (info.assignment) { auto defs = transformAssignDefs(info.assignment->expList, DefOp::Get); for (const auto& def : defs) { - if (std::isupper(def[0]) && capital) { - capital->decls.push_back(def); + if (std::isupper(def.first[0]) && capital) { + capital->decls.push_back(def.first); } else if (any) { - any->decls.push_back(def); + any->decls.push_back(def.first); } } } @@ -3911,7 +3938,7 @@ private: } if (classDecl) { if (auto variable = classDecl->name->item.as()) { - auto className = _parser.toString(variable); + auto className = variableToString(variable); if (!className.empty()) { if (std::isupper(className[0]) && capital) { capital->decls.push_back(className); @@ -4378,7 +4405,7 @@ private: auto def = static_cast(_def); auto& arg = argItems.emplace_back(); switch (def->name->get_id()) { - case id(): arg.name = _parser.toString(def->name); break; + case id(): arg.name = variableToString(static_cast(def->name.get())); break; case id(): { assignSelf = true; if (def->op) { @@ -4391,13 +4418,21 @@ private: switch (selfName->name->get_id()) { case id(): { auto clsName = static_cast(selfName->name.get()); - arg.name = _parser.toString(clsName->name); + if (clsName->name.is()) { + arg.name = unicodeVariableFrom(static_cast(clsName->name.get())); + } else { + arg.name = _parser.toString(clsName->name); + } arg.assignSelf = _parser.toString(clsName); break; } case id(): { auto sfName = static_cast(selfName->name.get()); - arg.name = _parser.toString(sfName->name); + if (sfName->name.is()) { + arg.name = unicodeVariableFrom(static_cast(sfName->name.get())); + } else { + arg.name = _parser.toString(sfName->name); + } arg.assignSelf = _parser.toString(sfName); break; } @@ -4466,7 +4501,7 @@ private: case id(): { auto clsName = static_cast(name); auto nameStr = _parser.toString(clsName->name); - if (LuaKeywords.find(nameStr) != LuaKeywords.end()) { + if (clsName->name.is() || LuaKeywords.find(nameStr) != LuaKeywords.end()) { out.push_back("self.__class[\""s + nameStr + "\"]"s); if (invoke) { if (auto invokePtr = invoke.as()) { @@ -4487,7 +4522,7 @@ private: case id(): { auto sfName = static_cast(name); auto nameStr = _parser.toString(sfName->name); - if (LuaKeywords.find(nameStr) != LuaKeywords.end()) { + if (sfName->name.is() || LuaKeywords.find(nameStr) != LuaKeywords.end()) { out.push_back("self[\""s + nameStr + "\"]"s); if (invoke) { if (auto invokePtr = invoke.as()) { @@ -5028,7 +5063,7 @@ private: if (!ast_is(followItem)) { throw CompileError("colon chain item must be followed by invoke arguments"sv, colonItem); } - if (colonItem->name.is()) { + if (ast_is(colonItem->name)) { std::string callVar; auto block = x->new_ptr(); { @@ -5044,7 +5079,7 @@ private: } auto exp = newExp(chainValue, x); callVar = singleVariableFrom(exp, true); - if (callVar.empty()) { + if (callVar.empty() || !isLocal(callVar)) { callVar = getUnusedName("_call_"s); auto assignment = x->new_ptr(); assignment->expList.set(toAst(callVar, x)); @@ -5337,8 +5372,8 @@ private: throw CompileError("macro table field \"locals\" must be a table of strings"sv, x); } auto name = lua_tolstring(L, -1, &len); - if (_parser.match({name, len})) { - localVars.push_back(std::string(name, len)); + if (auto varNode = toAst({name, len}, x)) { + localVars.push_back(variableToString(varNode)); } else { throw CompileError("macro table field \"locals\" must contain names for local variables, got \""s + std::string(name, len) + '"', x); } @@ -5562,6 +5597,10 @@ private: } void transformDotChainItem(DotChainItem_t* dotChainItem, str_list& out) { + if (auto uname = dotChainItem->name.as()) { + out.push_back("[\""s + _parser.toString(uname) + "\"]"s); + return; + } auto name = _parser.toString(dotChainItem->name); if (LuaKeywords.find(name) != LuaKeywords.end()) { out.push_back("[\""s + name + "\"]"s); @@ -5882,7 +5921,7 @@ private: } void transformVariable(Variable_t* name, str_list& out) { - out.push_back(_parser.toString(name)); + out.push_back(variableToString(name)); } void transformNum(Num_t* num, str_list& out) { @@ -5993,9 +6032,9 @@ private: case id(): { auto keyName = static_cast(key); ast_ptr chainItem; - if (auto name = keyName->name.as()) { + if (ast_is(keyName->name)) { auto dotItem = x->new_ptr(); - dotItem->name.set(name); + dotItem->name.set(keyName->name); chainItem = dotItem.get(); } else { auto selfName = keyName->name.to(); @@ -6503,7 +6542,7 @@ private: endWithSlice = true; if (listVar.empty() && chainList.size() == 2) { if (auto var = chainList.front()->get_by_path()) { - listVar = _parser.toString(var); + listVar = variableToString(var); if (!isLocal(listVar)) listVar.clear(); } } @@ -6668,7 +6707,7 @@ private: void transformForHead(Variable_t* var, Exp_t* startVal, Exp_t* stopVal, ForStepValue_t* stepVal, str_list& out) { str_list temp; - std::string varName = _parser.toString(var); + std::string varName = variableToString(var); transformExp(startVal, temp, ExpUsage::Closure); transformExp(stopVal, temp, ExpUsage::Closure); if (stepVal) { @@ -7092,12 +7131,18 @@ private: void transform_variable_pair(VariablePair_t* pair, str_list& out) { auto name = _parser.toString(pair->name); + if (pair->name->name.is()) { + std::string varName = variableToString(pair->name); + out.push_back("[\""s + name + "\"] = "s + varName); + name = varName; + } else { + out.push_back(name + " = "s + name); + } if (_config.lintGlobalVariable && !isLocal(name)) { if (_globals.find(name) == _globals.end()) { _globals[name] = {pair->name->m_begin.m_line, pair->name->m_begin.m_col}; } } - out.push_back(name + " = "s + name); } void transform_normal_pair(NormalPair_t* pair, str_list& out, bool assignClass) { @@ -7162,6 +7207,11 @@ private: } break; } + case id(): { + auto nameStr = _parser.toString(name); + out.push_back("[\""s + nameStr + "\"]"s); + break; + } default: YUEE("AST node mismatch", name); break; } } @@ -7214,16 +7264,17 @@ private: } } - std::pair defineClassVariable(Assignable_t* assignable) { + std::tuple defineClassVariable(Assignable_t* assignable) { if (auto variable = assignable->item.as()) { - auto name = _parser.toString(variable); + auto name = variableToString(variable); + auto realName = variable->name.is() ? _parser.toString(variable->name) : name; if (addToScope(name)) { - return {name, true}; + return {name, true, realName}; } else { - return {name, false}; + return {name, false, realName}; } } - return {Empty, false}; + return {Empty, false, Empty}; } void transformClassDeclClosure(ClassDecl_t* classDecl, str_list& out) { @@ -7249,29 +7300,30 @@ private: auto extend = classDecl->extend.get(); std::string className; std::string assignItem; + std::string classTextName; if (assignable) { if (!isAssignable(assignable)) { throw CompileError("left hand expression is not assignable"sv, assignable); } bool newDefined = false; - std::tie(className, newDefined) = defineClassVariable(assignable); + std::tie(className, newDefined, classTextName) = defineClassVariable(assignable); if (newDefined) { temp.push_back(indent() + "local "s + className + nll(classDecl)); } - if (className.empty()) { + if (classTextName.empty()) { if (auto chain = ast_cast(assignable->item)) { if (auto dotChain = ast_cast(chain->items.back())) { - className = '\"' + _parser.toString(dotChain->name) + '\"'; + classTextName = '\"' + _parser.toString(dotChain->name) + '\"'; } else if (auto index = ast_cast(chain->items.back())) { if (auto name = index->get_by_path()) { transformString(name, temp); - className = std::move(temp.back()); + classTextName = std::move(temp.back()); temp.pop_back(); } } } } else { - className = '\"' + className + '\"'; + classTextName = '\"' + classTextName + '\"'; } pushScope(); transformAssignable(assignable, temp); @@ -7279,9 +7331,17 @@ private: assignItem = std::move(temp.back()); temp.pop_back(); } else if (expList) { - auto name = singleVariableFrom(expList, true); - if (!name.empty()) { - className = '\"' + name + '\"'; + if (auto value = singleValueFrom(expList); value->item.is()) { + auto chainValue = static_cast(value->item.get()); + if (auto callable = ast_cast(chainValue->items.front()); callable && chainValue->items.size() == 1) { + if (auto self = callable->item.as()) { + if (auto selfVar = self->name.as()) { + classTextName = "\"self\""; + } + } else if (auto var = callable->item.as()) { + classTextName = '\"' + _parser.toString(var) + '\"'; + } + } } } temp.push_back(indent() + "do"s + nll(classDecl)); @@ -7296,7 +7356,9 @@ private: ClassDecl_t* clsDecl = nullptr; if (auto assignment = assignmentFrom(statement)) { auto names = transformAssignDefs(assignment->expList.get(), DefOp::Mark); - varDefs.insert(varDefs.end(), names.begin(), names.end()); + for (const auto& name : names) { + varDefs.push_back(name.first); + } auto info = extractDestructureInfo(assignment, true, false); if (!info.destructures.empty()) { for (const auto& destruct : info.destructures) @@ -7320,8 +7382,9 @@ private: } if (clsDecl) { std::string clsName; + std::string clsTextName; bool newDefined = false; - std::tie(clsName, newDefined) = defineClassVariable(clsDecl->name); + std::tie(clsName, newDefined, clsTextName) = defineClassVariable(clsDecl->name); if (newDefined) varDefs.push_back(clsName); } } @@ -7437,9 +7500,9 @@ private: } } _buf << indent(1) << "__base = "sv << baseVar; - if (!className.empty()) { + if (!classTextName.empty()) { _buf << ","sv << nll(classDecl); - _buf << indent(1) << "__name = "sv << className; + _buf << indent(1) << "__name = "sv << classTextName; } if (extend) { _buf << ","sv << nll(classDecl); @@ -7546,12 +7609,15 @@ private: type = MemType::Property; auto name = ast_cast(selfItem->name); if (!name) throw CompileError("invalid class poperty name"sv, selfItem->name); - newSuperCall = classVar + ".__parent."s + _parser.toString(name->name); + if (name->name.is()) { + newSuperCall = classVar + ".__parent[\""s + _parser.toString(name->name) + "\"]"s; + } else { + newSuperCall = classVar + ".__parent."s + _parser.toString(name->name); + } } else { auto x = keyName; - auto nameNode = keyName->name.as(); - if (!nameNode) break; - auto name = _parser.toString(nameNode); + if (!ast_is(keyName->name)) break; + auto name = _parser.toString(keyName->name); if (name == "new"sv) { type = MemType::Builtin; keyName->name.set(toAst("__init"sv, x)); @@ -7563,7 +7629,7 @@ private: normal_pair->value->traverse([&](ast_node* node) { if (node->get_id() == id()) return traversal::Return; if (auto chainValue = ast_cast(node)) { - if (auto callable = ast_cast(chainValue->items.front())) { + if (auto callable = ast_cast(chainValue->items.front()); callable && callable->item.is()) { auto var = callable->item.get(); if (_parser.toString(var) == "super"sv) { auto insertSelfToArguments = [&](ast_node* item) { @@ -7763,7 +7829,7 @@ private: } if (clsDecl) { auto variable = clsDecl->name.as(); - if (!isDefined(_parser.toString(variable))) return traversal::Stop; + if (!isDefined(variableToString(variable))) return traversal::Stop; } return traversal::Return; } @@ -7817,9 +7883,11 @@ private: switch (item->get_id()) { case id(): { auto classDecl = static_cast(item); - if (classDecl->name && classDecl->name->item->get_id() == id()) { - markVarsGlobal(GlobalMode::Any); - addGlobalVar(_parser.toString(classDecl->name->item), classDecl->name->item); + if (classDecl->name) { + if (auto var = classDecl->name->item.as()) { + markVarsGlobal(GlobalMode::Any); + addGlobalVar(variableToString(var), classDecl->name->item); + } } transformClassDecl(classDecl, out, ExpUsage::Common); break; @@ -7837,7 +7905,8 @@ private: if (values->valueList) { auto expList = x->new_ptr(); for (auto name : values->nameList->names.objects()) { - addGlobalVar(_parser.toString(name), name); + auto var = static_cast(name); + addGlobalVar(variableToString(var), var); auto callable = x->new_ptr(); callable->item.set(name); auto chainValue = x->new_ptr(); @@ -7858,7 +7927,8 @@ private: transformAssignment(assignment, out); } else { for (auto name : values->nameList->names.objects()) { - addGlobalVar(_parser.toString(name), name); + auto var = static_cast(name); + addGlobalVar(variableToString(var), var); } } break; @@ -7938,14 +8008,14 @@ private: dotChain && dotChain->name.is()) { auto nameStr = name.value(); if (_exportedMetaKeys.find(nameStr) != _exportedMetaKeys.end()) { - throw CompileError("export module metamethod key \"" + nameStr + "\" duplicated"s, dotChain->name); + throw CompileError("export module metamethod key \""s + nameStr + "\" duplicated"s, dotChain->name); } else { _exportedMetaKeys.insert(nameStr); } } else { auto nameStr = name.value(); if (_exportedKeys.find(nameStr) != _exportedKeys.end()) { - throw CompileError("export module key \"" + nameStr + "\" duplicated"s, exportNode->target); + throw CompileError("export module key \""s + nameStr + "\" duplicated"s, exportNode->target); } else { _exportedKeys.insert(nameStr); } @@ -7977,26 +8047,30 @@ private: assignment->expList.set(expList); assignment->action.set(exportNode->assign); transformAssignment(assignment, out); - str_list names = transformAssignDefs(expList, DefOp::Get); + auto names = transformAssignDefs(expList, DefOp::Get); auto info = extractDestructureInfo(assignment, true, false); if (!info.destructures.empty()) { for (const auto& destruct : info.destructures) for (const auto& item : destruct.items) - if (!item.targetVar.empty()) - names.push_back(item.targetVar); + if (!item.targetVar.empty()) { + auto dot = ast_cast(item.structure->items.back()); + auto uname = dot->name.as(); + names.emplace_back(item.targetVar, uname ? _parser.toString(uname) : Empty); + } } if (_info.exportDefault) { - out.back().append(indent() + _info.moduleName + " = "s + names.back() + nlr(exportNode)); + out.back().append(indent() + _info.moduleName + " = "s + names.back().first + nlr(exportNode)); } else { str_list lefts, rights; for (const auto& name : names) { - if (_exportedKeys.find(name) != _exportedKeys.end()) { - throw CompileError("export module key \"" + name + "\" duplicated"s, x); + auto realName = name.second.empty() ? name.first : name.second; + if (_exportedKeys.find(realName) != _exportedKeys.end()) { + throw CompileError("export module key \""s + realName + "\" duplicated"s, x); } else { - _exportedKeys.insert(name); + _exportedKeys.insert(realName); } - lefts.push_back(_info.moduleName + "[\""s + name + "\"]"s); - rights.push_back(name); + lefts.push_back(_info.moduleName + "[\""s + realName + "\"]"s); + rights.push_back(name.first); } out.back().append(indent() + join(lefts, ", "sv) + " = "s + join(rights, ", "sv) + nlr(exportNode)); } @@ -8020,16 +8094,18 @@ private: assignment->expList.set(assignList); for (auto exp : expList->exprs.objects()) { if (auto classDecl = exp->get_by_path()) { - if (classDecl->name && classDecl->name->item->get_id() == id()) { - transformClassDecl(classDecl, temp, ExpUsage::Common); - auto name = _parser.toString(classDecl->name->item); - assignment->expList.set(toAst(_info.moduleName + "[\""s + name + "\"]"s, x)); - auto assign = x->new_ptr(); - assign->values.push_back(toAst(name, x)); - assignment->action.set(assign); - transformAssignment(assignment, temp); - assignment->expList.set(assignList); - continue; + if (classDecl->name) { + if (auto var = classDecl->name->item.as()) { + transformClassDecl(classDecl, temp, ExpUsage::Common); + auto name = variableToString(var); + assignment->expList.set(toAst(_info.moduleName + "[\""s + name + "\"]"s, x)); + auto assign = x->new_ptr(); + assign->values.push_back(toAst(name, x)); + assignment->action.set(assign); + transformAssignment(assignment, temp); + assignment->expList.set(assignList); + continue; + } } } auto assign = x->new_ptr(); @@ -8180,7 +8256,7 @@ private: auto x = tryNode; ast_ptr errHandler; if (tryNode->catchBlock) { - auto errHandleStr = "("s + _parser.toString(tryNode->catchBlock->err) + ")->"s; + auto errHandleStr = "("s + variableToString(tryNode->catchBlock->err) + ")->"s; errHandler.set(toAst(errHandleStr, x->func)); auto funLit = simpleSingleValueFrom(errHandler)->value.to(); auto body = x->new_ptr(); @@ -8582,7 +8658,7 @@ private: assignment->action.set(assign); transformAssignment(assignment, out); if (auto var = ast_cast(target)) { - auto moduleName = _parser.toString(var); + auto moduleName = variableToString(var); markVarConst(moduleName); } else { markDestructureConst(assignment); @@ -9001,7 +9077,7 @@ private: auto assign = x->new_ptr(); str_list vars; for (auto varNode : listA->names.objects()) { - auto var = _parser.toString(varNode); + auto var = variableToString(static_cast(varNode)); forceAddToScope(var); vars.push_back(var); auto callable = x->new_ptr(); @@ -9038,7 +9114,7 @@ private: for (auto name : listA->names.objects()) { auto closeName = getUnusedName("_close_"sv); leftVars.push_back(closeName); - rightVars.push_back(_parser.toString(name)); + rightVars.push_back(variableToString(static_cast(name))); addToScope(closeName); } popScope(); @@ -9052,7 +9128,7 @@ private: if (!listA->names.empty()) { str_list vars; for (auto name : listA->names.objects()) { - auto var = _parser.toString(name); + auto var = variableToString(static_cast(name)); forceAddToScope(var); vars.push_back(var); } @@ -9161,7 +9237,7 @@ private: if (getLuaTarget(label) < 502) { throw CompileError("label statement is not available when not targeting Lua version 5.2 or higher"sv, label); } - auto labelStr = _parser.toString(label->label); + auto labelStr = unicodeVariableFrom(label->label->name); int currentScope = _gotoScopes.top(); if (static_cast(_labels.size()) <= currentScope) { _labels.resize(currentScope + 1, std::nullopt); @@ -9182,7 +9258,7 @@ private: if (getLuaTarget(gotoNode) < 502) { throw CompileError("goto statement is not available when not targeting Lua version 5.2 or higher"sv, gotoNode); } - auto labelStr = _parser.toString(gotoNode->label); + auto labelStr = unicodeVariableFrom(gotoNode->label->name); gotos.push_back({gotoNode, labelStr, _gotoScopes.top(), static_cast(_scopes.size())}); out.push_back(indent() + "goto "s + labelStr + nll(gotoNode)); } diff --git a/src/yuescript/yue_parser.cpp b/src/yuescript/yue_parser.cpp index 693a3bc..ea45e85 100644 --- a/src/yuescript/yue_parser.cpp +++ b/src/yuescript/yue_parser.cpp @@ -59,7 +59,8 @@ YueParser::YueParser() { white = space >> *(line_break >> space); alpha_num = range('a', 'z') | range('A', 'Z') | range('0', '9') | '_'; not_alpha_num = not_(alpha_num); - Name = (range('a', 'z') | range('A', 'Z') | '_') >> *alpha_num; + Name = (range('a', 'z') | range('A', 'Z') | '_') >> *alpha_num >> not_(larger(255)); + UnicodeName = (range('a', 'z') | range('A', 'Z') | '_' | larger(255)) >> *(larger(255) | alpha_num); num_expo = set("eE") >> -set("+-") >> num_char; num_expo_hex = set("pP") >> -set("+-") >> num_char; lj_num = -set("uU") >> set("lL") >> set("lL"); @@ -160,23 +161,34 @@ YueParser::YueParser() { #define body (in_block | Statement | empty_block_error) - Variable = pl::user(Name, [](const item_t& item) { + Variable = pl::user(Name | UnicodeName, [](const item_t& item) { State* st = reinterpret_cast(item.user_data); - for (auto it = item.begin->m_it; it != item.end->m_it; ++it) st->buffer += static_cast(*it); + for (auto it = item.begin->m_it; it != item.end->m_it; ++it) { + if (*it > 255) { + st->buffer.clear(); + return true; + } + st->buffer += static_cast(*it); + } auto isValid = Keywords.find(st->buffer) == Keywords.end(); if (isValid) { - if (st->buffer == st->moduleName) { - st->moduleFix++; - st->moduleName = "_module_"s + std::to_string(st->moduleFix); + if (st->buffer[0] == '_') { + st->usedNames.insert(st->buffer); } } st->buffer.clear(); return isValid; }); - LabelName = pl::user(Name, [](const item_t& item) { + LabelName = pl::user(UnicodeName, [](const item_t& item) { State* st = reinterpret_cast(item.user_data); - for (auto it = item.begin->m_it; it != item.end->m_it; ++it) st->buffer += static_cast(*it); + for (auto it = item.begin->m_it; it != item.end->m_it; ++it) { + if (*it > 255) { + st->buffer.clear(); + return true; + } + st->buffer += static_cast(*it); + } auto isValid = LuaKeywords.find(st->buffer) == LuaKeywords.end(); st->buffer.clear(); return isValid; @@ -191,12 +203,12 @@ YueParser::YueParser() { }); Self = '@'; - SelfName = '@' >> Name; + SelfName = '@' >> (Name | UnicodeName); SelfClass = "@@"; - SelfClassName = "@@" >> Name; + SelfClassName = "@@" >> (Name | UnicodeName); SelfItem = SelfClassName | SelfClass | SelfName | Self; - KeyName = SelfItem | Name; + KeyName = SelfItem | Name | UnicodeName; VarArg = "..."; check_indent = pl::user(plain_space, [](const item_t& item) { @@ -275,7 +287,7 @@ YueParser::YueParser() { import_name_list = Seperator >> *space_break >> space >> import_name >> *((+space_break | space >> ',' >> *space_break) >> space >> import_name); ImportFrom = import_name_list >> *space_break >> space >> key("from") >> space >> Exp; - ImportLiteralInner = (range('a', 'z') | range('A', 'Z') | set("_-")) >> *(alpha_num | '-'); + ImportLiteralInner = (range('a', 'z') | range('A', 'Z') | set("_-") | larger(255)) >> *(alpha_num | '-' | larger(255)); import_literal_chain = Seperator >> ImportLiteralInner >> *('.' >> ImportLiteralInner); ImportLiteral = ( '\'' >> import_literal_chain >> '\'' @@ -608,8 +620,8 @@ YueParser::YueParser() { DotChainItem >> -ExistentialOp | Slice | index >> -ExistentialOp; - DotChainItem = '.' >> (Name | Metatable | Metamethod); - ColonChainItem = (expr('\\') | "::") >> (LuaKeyword | Name | Metamethod); + DotChainItem = '.' >> (Name | Metatable | Metamethod | UnicodeName); + ColonChainItem = (expr('\\') | "::") >> (LuaKeyword | Name | Metamethod | UnicodeName); invoke_chain = Invoke >> -ExistentialOp >> -chain_items; colon_chain = ColonChainItem >> -ExistentialOp >> -invoke_chain; @@ -779,10 +791,10 @@ YueParser::YueParser() { FnArrow = expr("->") | "=>"; FunLit = -FnArgsDef >> space >> FnArrow >> -(space >> Body); - MacroName = '$' >> Name; + MacroName = '$' >> UnicodeName; macro_args_def = '(' >> white >> -FnArgDefList >> white >> ')'; MacroLit = -(macro_args_def >> space) >> "->" >> space >> Body; - Macro = key("macro") >> space >> Name >> space >> '=' >> space >> MacroLit; + Macro = key("macro") >> space >> UnicodeName >> space >> '=' >> space >> MacroLit; MacroInPlace = '$' >> space >> "->" >> space >> Body; NameList = Seperator >> Variable >> *(space >> ',' >> space >> Variable); @@ -955,7 +967,15 @@ ParseInfo YueParser::parse(std::string_view codes, rule& r) { State state; res.node.set(::yue::parse(*(res.codes), r, errors, &state)); if (state.exportCount > 0) { - res.moduleName = std::move(state.moduleName); + int index = 0; + std::string moduleName; + auto moduleStr = "_module_"s; + do { + moduleName = moduleStr + std::to_string(index); + index++; + } while (state.usedNames.find(moduleName) != state.usedNames.end()); + res.moduleName = moduleName; + res.usedNames = std::move(state.usedNames); res.exportDefault = state.exportDefault; res.exportMacro = state.exportMacro; res.exportMetatable = !state.exportMetatable && state.exportMetamethod; diff --git a/src/yuescript/yue_parser.h b/src/yuescript/yue_parser.h index dd5e253..72d966d 100644 --- a/src/yuescript/yue_parser.h +++ b/src/yuescript/yue_parser.h @@ -40,6 +40,7 @@ struct ParseInfo { bool exportMacro = false; bool exportMetatable = false; std::string moduleName; + std::unordered_set usedNames; std::string errorMessage(std::string_view msg, int errLine, int errCol, int lineOffset = 0) const; }; @@ -105,16 +106,15 @@ protected: bool exportMetatable = false; bool exportMetamethod = false; int exportCount = 0; - int moduleFix = 0; int expLevel = 0; size_t stringOpen = 0; - std::string moduleName = "_module_0"s; std::string buffer; std::stack indents; std::stack noDoStack; std::stack noChainBlockStack; std::stack noTableBlockStack; std::stack noForStack; + std::unordered_set usedNames; }; template @@ -254,6 +254,7 @@ private: AST_RULE(Num); AST_RULE(Name); + AST_RULE(UnicodeName); AST_RULE(Variable); AST_RULE(LabelName); AST_RULE(LuaKeyword); -- cgit v1.2.3-55-g6feb