diff options
author | 2008-03-05 15:23:42 +0000 | |
---|---|---|
committer | 2008-03-05 15:23:42 +0000 | |
commit | bb640388580c843cfc479d2fd3b91fbb1b79d663 (patch) | |
tree | f5d19fc219af38f6aa3d0d7c56cb27392f9f6e9b | |
parent | 9639b61039f536ad0d4e71a7134a943ffc705db5 (diff) | |
download | newsboat-bb640388580c843cfc479d2fd3b91fbb1b79d663.tar.gz newsboat-bb640388580c843cfc479d2fd3b91fbb1b79d663.tar.zst newsboat-bb640388580c843cfc479d2fd3b91fbb1b79d663.zip |
Andreas Krennmair:
autogenerated filterlib files.
-rw-r--r-- | filter/Parser.cpp | 296 | ||||
-rw-r--r-- | filter/Parser.h | 78 | ||||
-rw-r--r-- | filter/Scanner.cpp | 584 | ||||
-rw-r--r-- | filter/Scanner.h | 239 |
4 files changed, 1197 insertions, 0 deletions
diff --git a/filter/Parser.cpp b/filter/Parser.cpp new file mode 100644 index 00000000..78378399 --- /dev/null +++ b/filter/Parser.cpp @@ -0,0 +1,296 @@ + + +#include <wchar.h> +#include "Parser.h" +#include "Scanner.h" + + + + +void Parser::SynErr(int n) { + if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); + errDist = 0; +} + +void Parser::SemErr(wchar_t* msg) { + if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); + errDist = 0; +} + +void Parser::Get() { + for (;;) { + t = la; + la = scanner->Scan(); + if (la->kind <= maxT) { ++errDist; break; } + + if (dummyToken != t) { + dummyToken->kind = t->kind; + dummyToken->pos = t->pos; + dummyToken->col = t->col; + dummyToken->line = t->line; + dummyToken->next = NULL; + coco_string_delete(dummyToken->val); + dummyToken->val = coco_string_create(t->val); + t = dummyToken; + } + la = t; + } +} + +void Parser::Expect(int n) { + if (la->kind==n) Get(); else { SynErr(n); } +} + +void Parser::ExpectWeak(int n, int follow) { + if (la->kind == n) Get(); + else { + SynErr(n); + while (!StartOf(follow)) Get(); + } +} + +bool Parser::WeakSeparator(int n, int syFol, int repFol) { + if (la->kind == n) {Get(); return true;} + else if (StartOf(repFol)) {return false;} + else { + SynErr(n); + while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { + Get(); + } + return StartOf(syFol); + } +} + +void Parser::stringlit(char* &lit) { + if (la->kind == 4) { + Get(); + } else if (la->kind == 5) { + Get(); + } else SynErr(20); + lit = coco_string_create_char(t->val); +} + +void Parser::matchattrib(char* &name) { + Expect(3); + name = coco_string_create_char(t->val); +} + +void Parser::matchop(int &op) { + switch (la->kind) { + case 6: { + Get(); + op = MATCHOP_EQ; + break; + } + case 7: { + Get(); + op = MATCHOP_EQ; + break; + } + case 8: { + Get(); + op = MATCHOP_NE; + break; + } + case 9: { + Get(); + op = MATCHOP_RXEQ; + break; + } + case 10: { + Get(); + op = MATCHOP_RXNE; + break; + } + case 11: { + Get(); + op = MATCHOP_LT; + break; + } + case 12: { + Get(); + op = MATCHOP_GT; + break; + } + case 13: { + Get(); + op = MATCHOP_LE; + break; + } + case 14: { + Get(); + op = MATCHOP_GE; + break; + } + case 15: { + Get(); + op = MATCHOP_CONTAINS; + break; + } + case 16: { + Get(); + op = MATCHOP_CONTAINSNOT; + break; + } + default: SynErr(21); break; + } +} + +void Parser::logop(int &lop) { + if (la->kind == 17) { + Get(); + lop = LOGOP_AND; + } else if (la->kind == 18) { + Get(); + lop = LOGOP_OR; + } else SynErr(22); +} + +void Parser::matchexpr() { + char * name, * lit; int op; + matchattrib(name); + matchop(op); + stringlit(lit); + gen->add_matchexpr(name, op, lit); +} + +void Parser::blockexpr() { + Expect(1); + gen->open_block(); + expr(); + Expect(2); + gen->close_block(); +} + +void Parser::expr() { + int lop; + if (la->kind == 3) { + matchexpr(); + } else if (la->kind == 1) { + blockexpr(); + } else SynErr(23); + while (la->kind == 17 || la->kind == 18) { + logop(lop); + gen->add_logop(lop); + if (la->kind == 3) { + matchexpr(); + } else if (la->kind == 1) { + blockexpr(); + } else SynErr(24); + } +} + +void Parser::Filter() { + expr(); +} + + + +void Parser::Parse() { + t = NULL; + la = dummyToken = new Token(); + la->val = coco_string_create(L"Dummy Token"); + Get(); + Filter(); + + Expect(0); +} + +Parser::Parser(Scanner *scanner) { + _EOF = 0; + _openblock = 1; + _closeblock = 2; + _ident = 3; + _stringliteral = 4; + _numliteral = 5; + maxT = 19; + + minErrDist = 2; + errDist = minErrDist; + this->scanner = scanner; + errors = new Errors(); +} + +bool Parser::StartOf(int s) { + const bool T = true; + const bool x = false; + + static bool set[1][21] = { + {T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x} + }; + + + + return set[s][la->kind]; +} + +Parser::~Parser() { + delete errors; + delete dummyToken; +} + +Errors::Errors() { + count = 0; +} + +void Errors::SynErr(int line, int col, int n) { + wchar_t* s; + switch (n) { + case 0: s = coco_string_create(L"EOF expected"); break; + case 1: s = coco_string_create(L"openblock expected"); break; + case 2: s = coco_string_create(L"closeblock expected"); break; + case 3: s = coco_string_create(L"ident expected"); break; + case 4: s = coco_string_create(L"stringliteral expected"); break; + case 5: s = coco_string_create(L"numliteral expected"); break; + case 6: s = coco_string_create(L"\"==\" expected"); break; + case 7: s = coco_string_create(L"\"=\" expected"); break; + case 8: s = coco_string_create(L"\"!=\" expected"); break; + case 9: s = coco_string_create(L"\"=~\" expected"); break; + case 10: s = coco_string_create(L"\"!~\" expected"); break; + case 11: s = coco_string_create(L"\"<\" expected"); break; + case 12: s = coco_string_create(L"\">\" expected"); break; + case 13: s = coco_string_create(L"\"<=\" expected"); break; + case 14: s = coco_string_create(L"\">=\" expected"); break; + case 15: s = coco_string_create(L"\"#\" expected"); break; + case 16: s = coco_string_create(L"\"!#\" expected"); break; + case 17: s = coco_string_create(L"\"and\" expected"); break; + case 18: s = coco_string_create(L"\"or\" expected"); break; + case 19: s = coco_string_create(L"??? expected"); break; + case 20: s = coco_string_create(L"invalid stringlit"); break; + case 21: s = coco_string_create(L"invalid matchop"); break; + case 22: s = coco_string_create(L"invalid logop"); break; + case 23: s = coco_string_create(L"invalid expr"); break; + case 24: s = coco_string_create(L"invalid expr"); break; + + default: + { + wchar_t format[20]; + coco_swprintf(format, 20, L"error %d", n); + s = coco_string_create(format); + } + break; + } + /* wprintf(L"-- line %d col %d: %ls\n", line, col, s); */ + coco_string_delete(s); + count++; +} + +void Errors::Error(int line, int col, wchar_t *s) { + /* wprintf(L"-- line %d col %d: %ls\n", line, col, s); */ + count++; +} + +void Errors::Warning(int line, int col, wchar_t *s) { + /* wprintf(L"-- line %d col %d: %ls\n", line, col, s); */ +} + +void Errors::Warning(wchar_t *s) { + /* wprintf(L"%ls\n", s); */ +} + +void Errors::Exception(wchar_t* s) { + /* wprintf(L"%ls", s); */ +} + + + diff --git a/filter/Parser.h b/filter/Parser.h new file mode 100644 index 00000000..05d27468 --- /dev/null +++ b/filter/Parser.h @@ -0,0 +1,78 @@ + + +#if !defined(COCO_PARSER_H__) +#define COCO_PARSER_H__ + +#include "FilterParser.h" + + +#include "Scanner.h" + + + +class Errors { +public: + int count; // number of errors detected + + Errors(); + void SynErr(int line, int col, int n); + void Error(int line, int col, wchar_t *s); + void Warning(int line, int col, wchar_t *s); + void Warning(wchar_t *s); + void Exception(wchar_t *s); + +}; // Errors + +class Parser { +private: + int _EOF; + int _openblock; + int _closeblock; + int _ident; + int _stringliteral; + int _numliteral; + int maxT; + + Token *dummyToken; + int errDist; + int minErrDist; + + void SynErr(int n); + void Get(); + void Expect(int n); + bool StartOf(int s); + void ExpectWeak(int n, int follow); + bool WeakSeparator(int n, int syFol, int repFol); + +public: + Scanner *scanner; + Errors *errors; + + Token *t; // last recognized token + Token *la; // lookahead token + +FilterParser * gen; + + + + Parser(Scanner *scanner); + ~Parser(); + void SemErr(wchar_t* msg); + + void stringlit(char* &lit); + void matchattrib(char* &name); + void matchop(int &op); + void logop(int &lop); + void matchexpr(); + void blockexpr(); + void expr(); + void Filter(); + + void Parse(); + +}; // end Parser + + + +#endif // !defined(COCO_PARSER_H__) + diff --git a/filter/Scanner.cpp b/filter/Scanner.cpp new file mode 100644 index 00000000..f1bf5618 --- /dev/null +++ b/filter/Scanner.cpp @@ -0,0 +1,584 @@ + + +#include <memory.h> +#include <string.h> +#include "Scanner.h" + +// string handling, wide character + +wchar_t* coco_string_create(const wchar_t* value) { + wchar_t* data; + int len = 0; + if (value) { len = wcslen(value); } + data = new wchar_t[len + 1]; + wcsncpy(data, value, len); + data[len] = 0; + return data; +} + +wchar_t* coco_string_create(const wchar_t *value , int startIndex, int length) { + int len = 0; + wchar_t* data; + + if (value) { len = length; } + data = new wchar_t[len + 1]; + wcsncpy(data, &(value[startIndex]), len); + data[len] = 0; + + return data; +} + +wchar_t* coco_string_create_upper(wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t *newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'a' <= data[i]) && (data[i] <= L'z')) { + newData[i] = data[i] + (L'A' - L'a'); + } + else { newData[i] = data[i]; } + } + + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_lower(wchar_t* data) { + if (!data) { return NULL; } + + int dataLen = 0; + if (data) { dataLen = wcslen(data); } + + wchar_t* newData = new wchar_t[dataLen + 1]; + + for (int i = 0; i <= dataLen; i++) { + if ((L'A' <= data[i]) && (data[i] <= L'Z')) { + newData[i] = data[i] - (L'A'- L'a'); + } + else { newData[i] = data[i]; } + } + newData[dataLen] = L'\0'; + return newData; +} + +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { + wchar_t* data; + int data1Len = 0; + int data2Len = 0; + + if (data1) { data1Len = wcslen(data1); } + if (data2) {data2Len = wcslen(data2); } + + data = new wchar_t[data1Len + data2Len + 1]; + + if (data1) { wcscpy(data, data1); } + if (data2) { wcscpy(data + data1Len, data2); } + + data[data1Len + data2Len] = 0; + + return data; +} + +wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { + int targetLen = coco_string_length(target); + wchar_t* data = new wchar_t[targetLen + 2]; + wcsncpy(data, target, targetLen); + data[targetLen] = appendix; + data[targetLen + 1] = 0; + return data; +} + +void coco_string_delete(wchar_t* &data) { + delete [] data; + data = NULL; +} + +int coco_string_length(const wchar_t* data) { + if (data) { return wcslen(data); } + return 0; +} + +bool coco_string_endswith(wchar_t* data, wchar_t *end) { + int dataLen = wcslen(data); + int endLen = wcslen(end); + return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); +} + +int coco_string_indexof(wchar_t* data, wchar_t value) { + wchar_t* chr = wcschr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +int coco_string_lastindexof(wchar_t* data, wchar_t value) { + wchar_t* chr = wcsrchr(data, value); + + if (chr) { return (chr-data); } + return -1; +} + +void coco_string_merge(wchar_t* &target, wchar_t* appendix) { + if (!appendix) { return; } + wchar_t* data = coco_string_create_append(target, appendix); + delete [] target; + target = data; +} + +bool coco_string_equal(wchar_t* data1, wchar_t* data2) { + return wcscmp( data1, data2 ) == 0; +} + +int coco_string_compareto(wchar_t* data1, wchar_t* data2) { + return wcscmp(data1, data2); +} + +int coco_string_hash(wchar_t *data) { + int h = 0; + if (!data) { return 0; } + while (*data != 0) { + h = (h * 7) ^ *data; + ++data; + } + if (h < 0) { h = -h; } + return h; +} + +// string handling, ascii character + +wchar_t* coco_string_create(const char* value) { + int len = 0; + if (value) { len = strlen(value); } + wchar_t* data = new wchar_t[len + 1]; + for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } + data[len] = 0; + return data; +} + +char* coco_string_create_char(const wchar_t *value) { + int len = coco_string_length(value); + char *res = new char[len + 1]; + for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } + res[len] = 0; + return res; +} + +void coco_string_delete(char* &data) { + delete [] data; + data = NULL; +} + + + + +Token::Token() { + kind = 0; + pos = 0; + col = 0; + line = 0; + val = NULL; + next = NULL; +} + +Token::~Token() { + coco_string_delete(val); +} + + +Buffer::Buffer(std::istream* s, bool isUserStream) { + stream = s; this->isUserStream = isUserStream; + s->seekg(0, std::ios_base::end); + fileLen = bufLen = s->tellg(); + s->seekg(0, std::ios_base::beg); + buf = new char[MAX_BUFFER_LENGTH]; + bufStart = INT_MAX; // nothing in the buffer so far + SetPos(0); // setup buffer to position 0 (start) + if (bufLen == fileLen) Close(); +} + +Buffer::Buffer(Buffer *b) { + buf = b->buf; + b->buf = NULL; + bufStart = b->bufStart; + bufLen = b->bufLen; + fileLen = b->fileLen; + pos = b->pos; + stream = b->stream; + b->stream = NULL; + isUserStream = b->isUserStream; +} + +Buffer::~Buffer() { + Close(); + if (buf != NULL) { + delete [] buf; + buf = NULL; + } +} + +void Buffer::Close() { + if (!isUserStream && stream != NULL) { + std::ifstream * ifs = dynamic_cast<std::ifstream*>(stream); + if (ifs) { + ifs->close(); + delete ifs; + } + stream = NULL; + } +} + +int Buffer::Read() { + if (pos < bufLen) { + return buf[pos++]; + } else if (GetPos() < fileLen) { + SetPos(GetPos()); // shift buffer start to Pos + return buf[pos++]; + } else { + return EoF; + } +} + +int Buffer::Peek() { + int curPos = GetPos(); + int ch = Read(); + SetPos(curPos); + return ch; +} + +char* Buffer::GetString(int beg, int end) { + int len = end - beg; + char *buf = new char[len]; + int oldPos = GetPos(); + SetPos(beg); + for (int i = 0; i < len; ++i) buf[i] = (char) Read(); + SetPos(oldPos); + return buf; +} + +int Buffer::GetPos() { + return pos + bufStart; +} + +void Buffer::SetPos(int value) { + if (value < 0) value = 0; + else if (value > fileLen) value = fileLen; + if (value >= bufStart && value < bufStart + bufLen) { // already in buffer + pos = value - bufStart; + } else if (stream != NULL) { // must be swapped in + stream->seekg(0, std::ios_base::beg); + stream->get(buf, MAX_BUFFER_LENGTH); + bufLen = stream->gcount(); + bufStart = value; pos = 0; + } else { + pos = fileLen - bufStart; // make Pos return fileLen + } +} + +int UTF8Buffer::Read() { + int ch; + do { + ch = Buffer::Read(); + // until we find a uft8 start (0xxxxxxx or 11xxxxxx) + } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF)); + if (ch < 128 || ch == EOF) { + // nothing to do, first 127 chars are the same in ascii and utf8 + // 0xxxxxxx or end of file character + } else if ((ch & 0xF0) == 0xF0) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x07; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; ch = Buffer::Read(); + int c4 = ch & 0x3F; + ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; + } else if ((ch & 0xE0) == 0xE0) { + // 1110xxxx 10xxxxxx 10xxxxxx + int c1 = ch & 0x0F; ch = Buffer::Read(); + int c2 = ch & 0x3F; ch = Buffer::Read(); + int c3 = ch & 0x3F; + ch = (((c1 << 6) | c2) << 6) | c3; + } else if ((ch & 0xC0) == 0xC0) { + // 110xxxxx 10xxxxxx + int c1 = ch & 0x1F; ch = Buffer::Read(); + int c2 = ch & 0x3F; + ch = (c1 << 6) | c2; + } + return ch; +} + +Scanner::Scanner(const wchar_t* fileName) { + char *chFileName = coco_string_create_char(fileName); + std::ifstream* ifs = new std::ifstream(chFileName); + if (!ifs || !ifs->is_open()) { + wprintf(L"--- Cannot open file %ls\n", fileName); + exit(1); + } + coco_string_delete(chFileName); + buffer = new Buffer(ifs, false); + Init(); +} + +Scanner::Scanner(std::istream& s) { + buffer = new Buffer(&s, true); + Init(); +} + +Scanner::~Scanner() { + char* cur = (char*) firstHeap; + + while(cur != NULL) { + cur = *(char**) (cur + HEAP_BLOCK_SIZE); + free(firstHeap); + firstHeap = cur; + } + delete [] tval; + delete buffer; +} + +void Scanner::Init() { + EOL = '\n'; + eofSym = 0; + maxT = 19; + noSym = 19; + for (int i = 46; i <= 46; ++i) start.set(i, 3); + for (int i = 65; i <= 90; ++i) start.set(i, 3); + for (int i = 95; i <= 95; ++i) start.set(i, 3); + for (int i = 97; i <= 122; ++i) start.set(i, 3); + for (int i = 48; i <= 57; ++i) start.set(i, 6); + start.set(40, 1); + start.set(41, 2); + start.set(34, 4); + start.set(45, 7); + start.set(61, 16); + start.set(33, 17); + start.set(60, 18); + start.set(62, 19); + start.set(35, 14); + start.set(Buffer::EoF, -1); + keywords.set(L"and", 17); + keywords.set(L"or", 18); + + + tvalLength = 128; + tval = new wchar_t[tvalLength]; // text of current token + + // HEAP_BLOCK_SIZE byte heap + pointer to next heap block + heap = malloc(HEAP_BLOCK_SIZE + sizeof(void*)); + firstHeap = heap; + heapEnd = (void**) (((char*) heap) + HEAP_BLOCK_SIZE); + *heapEnd = 0; + heapTop = heap; + if (sizeof(Token) > HEAP_BLOCK_SIZE) { + wprintf(L"--- Too small HEAP_BLOCK_SIZE\n"); + exit(1); + } + + pos = -1; line = 1; col = 0; + oldEols = 0; + NextCh(); + if (ch == 0xEF) { // check optional byte order mark for UTF-8 + NextCh(); int ch1 = ch; + NextCh(); int ch2 = ch; + if (ch1 != 0xBB || ch2 != 0xBF) { + wprintf(L"Illegal byte order mark at start of file"); + exit(1); + } + Buffer *oldBuf = buffer; + buffer = new UTF8Buffer(buffer); col = 0; + delete oldBuf; oldBuf = NULL; + NextCh(); + } + + + pt = tokens = CreateToken(); // first token is a dummy +} + +void Scanner::NextCh() { + if (oldEols > 0) { ch = EOL; oldEols--; } + else { + pos = buffer->GetPos(); + ch = buffer->Read(); col++; + // replace isolated '\r' by '\n' in order to make + // eol handling uniform across Windows, Unix and Mac + if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; + if (ch == EOL) { line++; col = 0; } + } + +} + +void Scanner::AddCh() { + if (tlen >= tvalLength) { + tvalLength *= 2; + wchar_t *newBuf = new wchar_t[tvalLength]; + memcpy(newBuf, tval, tlen*sizeof(wchar_t)); + delete tval; + tval = newBuf; + } + tval[tlen++] = ch; + NextCh(); +} + + + +void Scanner::CreateHeapBlock() { + void* newHeap; + char* cur = (char*) firstHeap; + + while(((char*) tokens < cur) || ((char*) tokens > (cur + HEAP_BLOCK_SIZE))) { + cur = *((char**) (cur + HEAP_BLOCK_SIZE)); + free(firstHeap); + firstHeap = cur; + } + + // HEAP_BLOCK_SIZE byte heap + pointer to next heap block + newHeap = malloc(HEAP_BLOCK_SIZE + sizeof(void*)); + *heapEnd = newHeap; + heapEnd = (void**) (((char*) newHeap) + HEAP_BLOCK_SIZE); + *heapEnd = 0; + heap = newHeap; + heapTop = heap; +} + +Token* Scanner::CreateToken() { + Token *t; + if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { + CreateHeapBlock(); + } + t = (Token*) heapTop; + heapTop = (void*) ((char*) heapTop + sizeof(Token)); + t->val = NULL; + t->next = NULL; + return t; +} + +void Scanner::AppendVal(Token *t) { + int reqMem = (tlen + 1) * sizeof(wchar_t); + if (((char*) heapTop + reqMem) >= (char*) heapEnd) { + if (reqMem > HEAP_BLOCK_SIZE) { + wprintf(L"--- Too long token value\n"); + exit(1); + } + CreateHeapBlock(); + } + t->val = (wchar_t*) heapTop; + heapTop = (void*) ((char*) heapTop + reqMem); + + wcsncpy(t->val, tval, tlen); + t->val[tlen] = L'\0'; +} + +Token* Scanner::NextToken() { + while (ch == ' ' || + false + ) NextCh(); + + t = CreateToken(); + t->pos = pos; t->col = col; t->line = line; + int state = start.state(ch); + tlen = 0; AddCh(); + + switch (state) { + case -1: { t->kind = eofSym; break; } // NextCh already done + case 0: { t->kind = noSym; break; } // NextCh already done + case 1: + {t->kind = 1; break;} + case 2: + {t->kind = 2; break;} + case 3: + case_3: + if (ch >= L'-' && ch <= L'.' || ch >= L'A' && ch <= L'Z' || ch == L'_' || ch >= L'a' && ch <= L'z') {AddCh(); goto case_3;} + else {t->kind = 3; t->val = coco_string_create(tval, 0, tlen); t->kind = keywords.get(t->val, t->kind); return t;} + case 4: + case_4: + if (ch <= L'!' || ch >= L'#' && ch <= 65535) {AddCh(); goto case_4;} + else if (ch == L'"') {AddCh(); goto case_5;} + else {t->kind = noSym; break;} + case 5: + case_5: + {t->kind = 4; break;} + case 6: + case_6: + if (ch >= L'0' && ch <= L'9') {AddCh(); goto case_6;} + else {t->kind = 5; break;} + case 7: + if (ch >= L'-' && ch <= L'.' || ch >= L'A' && ch <= L'Z' || ch == L'_' || ch >= L'a' && ch <= L'z') {AddCh(); goto case_3;} + else if (ch >= L'0' && ch <= L'9') {AddCh(); goto case_6;} + else {t->kind = 3; t->val = coco_string_create(tval, 0, tlen); t->kind = keywords.get(t->val, t->kind); return t;} + case 8: + case_8: + {t->kind = 6; break;} + case 9: + case_9: + {t->kind = 8; break;} + case 10: + case_10: + {t->kind = 9; break;} + case 11: + case_11: + {t->kind = 10; break;} + case 12: + case_12: + {t->kind = 13; break;} + case 13: + case_13: + {t->kind = 14; break;} + case 14: + {t->kind = 15; break;} + case 15: + case_15: + {t->kind = 16; break;} + case 16: + if (ch == L'=') {AddCh(); goto case_8;} + else if (ch == L'~') {AddCh(); goto case_10;} + else {t->kind = 7; break;} + case 17: + if (ch == L'=') {AddCh(); goto case_9;} + else if (ch == L'~') {AddCh(); goto case_11;} + else if (ch == L'#') {AddCh(); goto case_15;} + else {t->kind = noSym; break;} + case 18: + if (ch == L'=') {AddCh(); goto case_12;} + else {t->kind = 11; break;} + case 19: + if (ch == L'=') {AddCh(); goto case_13;} + else {t->kind = 12; break;} + + } + AppendVal(t); + return t; +} + +// get the next token (possibly a token already seen during peeking) +Token* Scanner::Scan() { + if (tokens->next == NULL) { + return pt = tokens = NextToken(); + } else { + pt = tokens = tokens->next; + return tokens; + } +} + +// peek for the next token, ignore pragmas +Token* Scanner::Peek() { + if (pt->next == NULL) { + do { + pt = pt->next = NextToken(); + } while (pt->kind > maxT); // skip pragmas + } else { + do { + pt = pt->next; + } while (pt->kind > maxT); + } + return pt; +} + +// make sure that peeking starts at the current scan position +void Scanner::ResetPeek() { + pt = tokens; +} + + + diff --git a/filter/Scanner.h b/filter/Scanner.h new file mode 100644 index 00000000..abe6a998 --- /dev/null +++ b/filter/Scanner.h @@ -0,0 +1,239 @@ + + +#if !defined(COCO_SCANNER_H__) +#define COCO_SCANNER_H__ + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +#if _MSC_VER >= 1400 +#define coco_swprintf swprintf_s +#elif _MSC_VER >= 1300 +#define coco_swprintf _snwprintf +#elif defined __GNUC__ +#define coco_swprintf swprintf +#else +#error unknown compiler! +#endif + +#include <iostream> +#include <fstream> + +#define COCO_WCHAR_MAX 65535 +#define MAX_BUFFER_LENGTH (64*1024) +#define HEAP_BLOCK_SIZE (64*1024) + +// string handling, wide character +wchar_t* coco_string_create(const wchar_t *value); +wchar_t* coco_string_create(const wchar_t *value , int startIndex, int length); +wchar_t* coco_string_create_upper(wchar_t* data); +wchar_t* coco_string_create_lower(wchar_t* data); +wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); +wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); +void coco_string_delete(wchar_t* &data); +int coco_string_length(const wchar_t* data); +bool coco_string_endswith(wchar_t* data, wchar_t *value); +int coco_string_indexof(wchar_t* data, wchar_t value); +int coco_string_lastindexof(wchar_t* data, wchar_t value); +void coco_string_merge(wchar_t* &data, wchar_t* value); +bool coco_string_equal(wchar_t* data1, wchar_t* data2); +int coco_string_compareto(wchar_t* data1, wchar_t* data2); +int coco_string_hash(wchar_t* data); + +// string handling, ascii character +wchar_t* coco_string_create(const char *value); +char* coco_string_create_char(const wchar_t *value); +void coco_string_delete(char* &data); + + + + +class Token +{ +public: + int kind; // token kind + int pos; // token position in the source text (starting at 0) + int col; // token column (starting at 1) + int line; // token line (starting at 1) + wchar_t* val; // token value + Token *next; // ML 2005-03-11 Peek tokens are kept in linked list + + Token(); + ~Token(); + +}; + +class Buffer { +private: + char *buf; // input buffer + int bufStart; // position of first byte in buffer relative to input stream + int bufLen; // length of buffer + int fileLen; // length of input stream + int pos; // current position in buffer + std::istream* stream; // input stream (seekable) + bool isUserStream; // was the stream opened by the user? + +public: + static const int EoF = COCO_WCHAR_MAX + 1; + + Buffer(std::istream* s, bool isUserStream); + Buffer(Buffer *b); + virtual ~Buffer(); + + virtual void Close(); + virtual int Read(); + virtual int Peek(); + virtual char* GetString(int beg, int end); + virtual int GetPos(); + virtual void SetPos(int value); +}; + +class UTF8Buffer : public Buffer { +public: + UTF8Buffer(Buffer *b) : Buffer(b) {}; + virtual int Read(); +}; + +//----------------------------------------------------------------------------------- +// StartStates -- maps charactes to start states of tokens +//----------------------------------------------------------------------------------- +class StartStates { +private: + class Elem { + public: + int key, val; + Elem *next; + Elem(int key, int val) { this->key = key; this->val = val; next = NULL; } + }; + + Elem **tab; + +public: + StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } + virtual ~StartStates() { + for (int i = 0; i < 128; ++i) { + Elem *e = tab[i]; + while (e != NULL) { + Elem *next = e->next; + delete e; + e = next; + } + } + delete [] tab; + } + + void set(int key, int val) { + Elem *e = new Elem(key, val); + int k = key % 128; + e->next = tab[k]; tab[k] = e; + } + + int state(int key) { + Elem *e = tab[key % 128]; + while (e != NULL && e->key != key) e = e->next; + return e == NULL ? 0 : e->val; + } +}; + +//------------------------------------------------------------------------------------------- +// KeywordMap -- maps strings to integers (identifiers to keyword kinds) +//------------------------------------------------------------------------------------------- +class KeywordMap { +private: + class Elem { + public: + wchar_t *key; + int val; + Elem *next; + Elem(wchar_t *key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; } + virtual ~Elem() { coco_string_delete(key); } + }; + + Elem **tab; + +public: + KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } + virtual ~KeywordMap() { + for (int i = 0; i < 128; ++i) { + Elem *e = tab[i]; + while (e != NULL) { + Elem *next = e->next; + delete e; + e = next; + } + } + delete [] tab; + } + + void set(wchar_t *key, int val) { + Elem *e = new Elem(key, val); + int k = coco_string_hash(key) % 128; + e->next = tab[k]; tab[k] = e; + } + + int get(wchar_t *key, int defaultVal) { + Elem *e = tab[coco_string_hash(key) % 128]; + while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; + return e == NULL ? defaultVal : e->val; + } +}; + +class Scanner { +private: + void *firstHeap; + void *heap; + void *heapTop; + void **heapEnd; + + char EOL; + int eofSym; + int noSym; + int maxT; + int charSetSize; + StartStates start; + KeywordMap keywords; + + Token *t; // current token + wchar_t *tval; // text of current token + int tvalLength; // length of text of current token + int tlen; // length of current token + + Token *tokens; // list of tokens already peeked (first token is a dummy) + Token *pt; // current peek token + + int ch; // current input character + + int pos; // byte position of current character + int line; // line number of current character + int col; // column number of current character + int oldEols; // EOLs that appeared in a comment; + + void CreateHeapBlock(); + Token* CreateToken(); + void AppendVal(Token *t); + + void Init(); + void NextCh(); + void AddCh(); + + Token* NextToken(); + +public: + Buffer *buffer; // scanner buffer + + Scanner(const wchar_t* fileName); + Scanner(std::istream& s); + ~Scanner(); + Token* Scan(); + Token* Peek(); + void ResetPeek(); + +}; // end Scanner + + + +#endif // !defined(COCO_SCANNER_H__) + |