#if !defined(COCO_SCANNER_H__) #define COCO_SCANNER_H__ #include #include #include #include #include #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s #elif _MSC_VER >= 1300 #define coco_swprintf _snwprintf #elif defined __GNUC__ #define coco_swprintf swprintf #else #error unknown compiler! #endif #include #include #define COCO_WCHAR_MAX 65535 #define MAX_BUFFER_LENGTH (64*1024) #define HEAP_BLOCK_SIZE (64*1024) // string handling, wide character wchar_t* coco_string_create(const wchar_t *value); wchar_t* coco_string_create(const wchar_t *value , int startIndex, int length); wchar_t* coco_string_create_upper(wchar_t* data); wchar_t* coco_string_create_lower(wchar_t* data); wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); void coco_string_delete(wchar_t* &data); int coco_string_length(const wchar_t* data); bool coco_string_endswith(wchar_t* data, wchar_t *value); int coco_string_indexof(wchar_t* data, wchar_t value); int coco_string_lastindexof(wchar_t* data, wchar_t value); void coco_string_merge(wchar_t* &data, wchar_t* value); bool coco_string_equal(wchar_t* data1, wchar_t* data2); int coco_string_compareto(wchar_t* data1, wchar_t* data2); int coco_string_hash(const wchar_t* data); // string handling, ascii character wchar_t* coco_string_create(const char *value); char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); class Token { public: int kind; // token kind int pos; // token position in the source text (starting at 0) int col; // token column (starting at 1) int line; // token line (starting at 1) wchar_t* val; // token value Token *next; // ML 2005-03-11 Peek tokens are kept in linked list Token(); ~Token(); }; class Buffer { private: char *buf; // input buffer int bufStart; // position of first byte in buffer relative to input stream int bufLen; // length of buffer int fileLen; // length of input stream int pos; // current position in buffer std::istream* stream; // input stream (seekable) bool isUserStream; // was the stream opened by the user? public: static const int EoF = COCO_WCHAR_MAX + 1; Buffer(std::istream* s, bool isUserStream); Buffer(Buffer *b); virtual ~Buffer(); virtual void Close(); virtual int Read(); virtual int Peek(); virtual char* GetString(int beg, int end); virtual int GetPos(); virtual void SetPos(int value); }; class UTF8Buffer : public Buffer { public: UTF8Buffer(Buffer *b) : Buffer(b) {}; virtual int Read(); }; //----------------------------------------------------------------------------------- // StartStates -- maps charactes to start states of tokens //----------------------------------------------------------------------------------- class StartStates { private: class Elem { public: int key, val; Elem *next; Elem(int key, int val) { this->key = key; this->val = val; next = NULL; } }; Elem **tab; public: StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } virtual ~StartStates() { for (int i = 0; i < 128; ++i) { Elem *e = tab[i]; while (e != NULL) { Elem *next = e->next; delete e; e = next; } } delete [] tab; } void set(int key, int val) { Elem *e = new Elem(key, val); int k = key % 128; e->next = tab[k]; tab[k] = e; } int state(int key) { Elem *e = tab[key % 128]; while (e != NULL && e->key != key) e = e->next; return e == NULL ? 0 : e->val; } }; //------------------------------------------------------------------------------------------- // KeywordMap -- maps strings to integers (identifiers to keyword kinds) //------------------------------------------------------------------------------------------- class KeywordMap { private: class Elem { public: wchar_t *key; int val; Elem *next; Elem(const wchar_t *key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; } virtual ~Elem() { coco_string_delete(key); } }; Elem **tab; public: KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } virtual ~KeywordMap() { for (int i = 0; i < 128; ++i) { Elem *e = tab[i]; while (e != NULL) { Elem *next = e->next; delete e; e = next; } } delete [] tab; } void set(const wchar_t *key, int val) { Elem *e = new Elem(key, val); int k = coco_string_hash(key) % 128; e->next = tab[k]; tab[k] = e; } int get(wchar_t *key, int defaultVal) { Elem *e = tab[coco_string_hash(key) % 128]; while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; return e == NULL ? defaultVal : e->val; } }; class Scanner { private: void *firstHeap; void *heap; void *heapTop; void **heapEnd; char EOL; int eofSym; int noSym; int maxT; int charSetSize; StartStates start; KeywordMap keywords; Token *t; // current token wchar_t *tval; // text of current token int tvalLength; // length of text of current token int tlen; // length of current token Token *tokens; // list of tokens already peeked (first token is a dummy) Token *pt; // current peek token int ch; // current input character int pos; // byte position of current character int line; // line number of current character int col; // column number of current character int oldEols; // EOLs that appeared in a comment; void CreateHeapBlock(); Token* CreateToken(); void AppendVal(Token *t); void Init(); void NextCh(); void AddCh(); Token* NextToken(); public: Buffer *buffer; // scanner buffer Scanner(const wchar_t* fileName); Scanner(std::istream& s); ~Scanner(); Token* Scan(); Token* Peek(); void ResetPeek(); }; // end Scanner #endif // !defined(COCO_SCANNER_H__)