思路
我用了 trie 树去解决保留字和符号的部分,其余的基本一样,注意一下科学技术法实数部分就行了,但是注意,如果是全空行也要输出 [EOL] 的,所以不能直接判。可以用这种方法读入:
while(getline(cin,now)){
cout<<"[EOL]"<<endl;
}
剩下的看代码吧。
代码
#include<iostream>
#include <set>
using namespace std;
typedef long long ll;
ll reserved_trie[200][130], sz_reserved;
void insert_reserved(string s) {
ll n = s.size();
ll u = 0;
for (int i = 0; i < n; ++i) {
ll index = s[i];
if (!reserved_trie[u][index]) {
reserved_trie[u][index] = ++sz_reserved;
}
u = reserved_trie[u][index];
}
}
ll symbol_trie[200][130], sz_symbol;
void insert_symbol(string s) {
ll n = s.size();
ll u = 0;
for (int i = 0; i < n; ++i) {
ll index = s[i];
if (!symbol_trie[u][index]) {
symbol_trie[u][index] = ++sz_symbol;
}
u = symbol_trie[u][index];
}
}
set<string> reserved_list = {"and", "break", "do", "else", "elseif", "end", "false", "for",
"function", "if", "in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while"};
set<string> symbol_list = {"+", "-", "*", "/", "%", "^", "#", "==", ">=", "<=", ">", "<", "~=", "(", ")",
"{", "}", "[", "]", ";", ":", ",", ".", "..", "...", "="};
string now;//当前行
ll pointer;
bool have_word;
string check_reserved() {
ll u = 0;
string name = "";
while (ll(now[pointer]) >= 33) {
u = reserved_trie[u][ll(now[pointer])];
if (u == 0 && !isalpha(ll(now[pointer])) && !isdigit(ll(now[pointer])) && now[pointer] != '_') {
break;
}
name += now[pointer];
pointer++;
}
if (!reserved_list.count(name)) {
return "unknown";
}
return name;
}
string check_symbol() {
ll u = 0;
string name = "";
while (ll(now[pointer]) >= 33) {
u = symbol_trie[u][ll(now[pointer])];
if (u == 0) {
break;
}
name += now[pointer];
pointer++;
}
if (!symbol_list.count(name)) {
return "unknown";
}
return name;
}
string check_name() {
if (!isalpha(now[pointer])) {
return "unknown";
}
string name = "";
while (ll(now[pointer]) >= 33) {
if (!isalpha(now[pointer]) && !isdigit(now[pointer]) && now[pointer] != '_') {
break;
}
name += now[pointer];
pointer++;
}
return name;
}
void reserved_part() {
ll start_pointer = pointer;
string reserved = check_reserved();
if (reserved == "unknown") {
pointer = start_pointer;
return;
}
cout << "[RESERVED] " << reserved << endl;
have_word = true;
}
void symbol_part() {
ll start_pointer = pointer;
string symbol = check_symbol();
if (symbol == "unknown") {
pointer = start_pointer;
return;
}
cout << "[SYMBOL] " << symbol << endl;
have_word = true;
}
void string_part(char type) {
cout << "[STRING] " << type;
pointer++;
while (now[pointer] != type) {
cout << now[pointer];
if (now[pointer] == '\\') {
cout << now[++pointer];
}
pointer++;
}
cout << type << endl;
have_word = true;
}
void name_part() {
ll start_pointer = pointer;
string name = check_name();
if (name == "unknown") {
pointer = start_pointer;
return;
}
cout << "[NAME] " << name << endl;
have_word = true;
}
void number_part() {
if (isdigit(now[pointer]) || (now[pointer] == '.' && isdigit(now[pointer + 1]))) {
string value = "";
if (now[pointer] == '0' && (now[pointer + 1] == 'x' || now[pointer + 1] == 'X') && isdigit(now[pointer + 2]) ||
(now[pointer + 2] >= 'a' && now[pointer + 2] <= 'f') ||
(now[pointer + 2] >= 'A' && now[pointer + 2] <= 'F')) {
value = now[pointer];
value += now[pointer + 1];
pointer += 2;
while (isdigit(now[pointer]) || (now[pointer] >= 'a' && now[pointer] <= 'f') ||
(now[pointer] >= 'A' && now[pointer] <= 'F')) {
value += now[pointer];
pointer++;
}
} else {
bool vis_point = false, vis_e = false;
while (true) {
if (now[pointer] == '.') {
if (vis_point || vis_e) {
break;
}
vis_point = true;
value += now[pointer];
} else if (now[pointer] == 'e' || now[pointer] == 'E') {
if (vis_e) {
break;
}
vis_e = true;
value += now[pointer];
} else if (isdigit(now[pointer])) {
value += now[pointer];
} else if ((now[pointer] == '+' || now[pointer] == '-') && now[pointer - 1] == 'e') {
value += now[pointer];
} else {
break;
}
pointer++;
}
}
cout << "[NUMBER] " << value << endl;
have_word = true;
}
}
int main() {
for (auto i: reserved_list) {
insert_reserved(i);
}
for (auto i: symbol_list) {
insert_symbol(i);
}
while (getline(cin, now)) {
pointer = 0;//回到行的开始
while (pointer < now.size()) {
have_word = false;
if (now[pointer] == '-' && now[pointer + 1] == '-') {
break;
}
if (now[pointer] == '"' || now[pointer] == '\'') {
string_part(now[pointer]);
pointer++;
continue;
}
number_part();
if (have_word) {
continue;
}
symbol_part();
if (have_word) {
continue;
}
reserved_part();
if (have_word) {
continue;
}
name_part();
if (have_word) {
continue;
}
pointer++;
}
printf("[EOL]\n");
}
return 0;
}