UVA12421 (Jiandan) Mua (I) - Lexical Analyzer

思路

我用了 trie 树去解决保留字和符号的部分,其余的基本一样,注意一下科学技术法实数部分就行了,但是注意,如果是全空行也要输出 [EOL] 的,所以不能直接判。可以用这种方法读入:

while(getline(cin,now)){
    cout<<"[EOL]"<<endl;
}

剩下的看代码吧。

代码

#include<iostream>
#include <set>

using namespace std;
typedef long long ll;
ll reserved_trie[200][130], sz_reserved;

void insert_reserved(string s) {
    ll n = s.size();
    ll u = 0;
    for (int i = 0; i < n; ++i) {
        ll index = s[i];
        if (!reserved_trie[u][index]) {
            reserved_trie[u][index] = ++sz_reserved;
        }
        u = reserved_trie[u][index];
    }
}

ll symbol_trie[200][130], sz_symbol;

void insert_symbol(string s) {
    ll n = s.size();
    ll u = 0;
    for (int i = 0; i < n; ++i) {
        ll index = s[i];
        if (!symbol_trie[u][index]) {
            symbol_trie[u][index] = ++sz_symbol;
        }
        u = symbol_trie[u][index];
    }
}

set<string> reserved_list = {"and", "break", "do", "else", "elseif", "end", "false", "for",
                             "function", "if", "in", "local", "nil", "not", "or", "repeat",
                             "return", "then", "true", "until", "while"};
set<string> symbol_list = {"+", "-", "*", "/", "%", "^", "#", "==", ">=", "<=", ">", "<", "~=", "(", ")",
                           "{", "}", "[", "]", ";", ":", ",", ".", "..", "...", "="};

string now;//当前行
ll pointer;
bool have_word;

string check_reserved() {
    ll u = 0;
    string name = "";
    while (ll(now[pointer]) >= 33) {
        u = reserved_trie[u][ll(now[pointer])];
        if (u == 0 && !isalpha(ll(now[pointer])) && !isdigit(ll(now[pointer])) && now[pointer] != '_') {
            break;
        }
        name += now[pointer];
        pointer++;
    }
    if (!reserved_list.count(name)) {
        return "unknown";
    }
    return name;
}

string check_symbol() {
    ll u = 0;
    string name = "";
    while (ll(now[pointer]) >= 33) {
        u = symbol_trie[u][ll(now[pointer])];
        if (u == 0) {
            break;
        }
        name += now[pointer];
        pointer++;
    }
    if (!symbol_list.count(name)) {
        return "unknown";
    }
    return name;
}

string check_name() {
    if (!isalpha(now[pointer])) {
        return "unknown";
    }
    string name = "";
    while (ll(now[pointer]) >= 33) {
        if (!isalpha(now[pointer]) && !isdigit(now[pointer]) && now[pointer] != '_') {
            break;
        }
        name += now[pointer];
        pointer++;
    }
    return name;
}

void reserved_part() {
    ll start_pointer = pointer;
    string reserved = check_reserved();
    if (reserved == "unknown") {
        pointer = start_pointer;
        return;
    }
    cout << "[RESERVED] " << reserved << endl;
    have_word = true;
}

void symbol_part() {
    ll start_pointer = pointer;
    string symbol = check_symbol();
    if (symbol == "unknown") {
        pointer = start_pointer;
        return;
    }
    cout << "[SYMBOL] " << symbol << endl;
    have_word = true;
}

void string_part(char type) {
    cout << "[STRING] " << type;
    pointer++;
    while (now[pointer] != type) {
        cout << now[pointer];
        if (now[pointer] == '\\') {
            cout << now[++pointer];
        }
        pointer++;
    }
    cout << type << endl;
    have_word = true;
}

void name_part() {
    ll start_pointer = pointer;
    string name = check_name();
    if (name == "unknown") {
        pointer = start_pointer;
        return;
    }
    cout << "[NAME] " << name << endl;
    have_word = true;
}

void number_part() {
    if (isdigit(now[pointer]) || (now[pointer] == '.' && isdigit(now[pointer + 1]))) {
        string value = "";
        if (now[pointer] == '0' && (now[pointer + 1] == 'x' || now[pointer + 1] == 'X') && isdigit(now[pointer + 2]) ||
            (now[pointer + 2] >= 'a' && now[pointer + 2] <= 'f') ||
            (now[pointer + 2] >= 'A' && now[pointer + 2] <= 'F')) {

            value = now[pointer];
            value += now[pointer + 1];
            pointer += 2;
            while (isdigit(now[pointer]) || (now[pointer] >= 'a' && now[pointer] <= 'f') ||
                   (now[pointer] >= 'A' && now[pointer] <= 'F')) {
                value += now[pointer];
                pointer++;
            }
        } else {
            bool vis_point = false, vis_e = false;
            while (true) {
                if (now[pointer] == '.') {
                    if (vis_point || vis_e) {
                        break;
                    }
                    vis_point = true;
                    value += now[pointer];
                } else if (now[pointer] == 'e' || now[pointer] == 'E') {
                    if (vis_e) {
                        break;
                    }
                    vis_e = true;
                    value += now[pointer];
                } else if (isdigit(now[pointer])) {
                    value += now[pointer];
                } else if ((now[pointer] == '+' || now[pointer] == '-') && now[pointer - 1] == 'e') {
                    value += now[pointer];
                } else {
                    break;
                }
                pointer++;
            }
        }
        cout << "[NUMBER] " << value << endl;
        have_word = true;
    }
}

int main() {
    for (auto i: reserved_list) {
        insert_reserved(i);
    }
    for (auto i: symbol_list) {
        insert_symbol(i);
    }
    while (getline(cin, now)) {
        pointer = 0;//回到行的开始
        while (pointer < now.size()) {
            have_word = false;
            if (now[pointer] == '-' && now[pointer + 1] == '-') {
                break;
            }
            if (now[pointer] == '"' || now[pointer] == '\'') {
                string_part(now[pointer]);
                pointer++;
                continue;
            }
            number_part();
            if (have_word) {
                continue;
            }
            symbol_part();
            if (have_word) {
                continue;
            }
            reserved_part();
            if (have_word) {
                continue;
            }
            name_part();
            if (have_word) {
                continue;
            }
            pointer++;
        }
        printf("[EOL]\n");
    }
    return 0;
}