diff options
Diffstat (limited to 'core/parse/lexer.go')
-rw-r--r-- | core/parse/lexer.go | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/core/parse/lexer.go b/core/parse/lexer.go new file mode 100644 index 000000000..d2939eba2 --- /dev/null +++ b/core/parse/lexer.go @@ -0,0 +1,122 @@ +package parse + +import ( + "bufio" + "io" + "unicode" +) + +type ( + // lexer is a utility which can get values, token by + // token, from a Reader. A token is a word, and tokens + // are separated by whitespace. A word can be enclosed + // in quotes if it contains whitespace. + lexer struct { + reader *bufio.Reader + token token + line int + } + + // token represents a single parsable unit. + token struct { + file string + line int + text string + } +) + +// load prepares the lexer to scan an input for tokens. +func (l *lexer) load(input io.Reader) error { + l.reader = bufio.NewReader(input) + l.line = 1 + return nil +} + +// next loads the next token into the lexer. +// A token is delimited by whitespace, unless +// the token starts with a quotes character (") +// in which case the token goes until the closing +// quotes (the enclosing quotes are not included). +// Inside quoted strings, quotes may be escaped +// with a preceding \ character. No other chars +// may be escaped. The rest of the line is skipped +// if a "#" character is read in. Returns true if +// a token was loaded; false otherwise. +func (l *lexer) next() bool { + var val []rune + var comment, quoted, escaped bool + + makeToken := func() bool { + l.token.text = string(val) + return true + } + + for { + ch, _, err := l.reader.ReadRune() + if err != nil { + if len(val) > 0 { + return makeToken() + } + if err == io.EOF { + return false + } + panic(err) + } + + if quoted { + if !escaped { + if ch == '\\' { + escaped = true + continue + } else if ch == '"' { + quoted = false + return makeToken() + } + } + if ch == '\n' { + l.line++ + } + if escaped { + // only escape quotes + if ch != '"' { + val = append(val, '\\') + } + } + val = append(val, ch) + escaped = false + continue + } + + if unicode.IsSpace(ch) { + if ch == '\r' { + continue + } + if ch == '\n' { + l.line++ + comment = false + } + if len(val) > 0 { + return makeToken() + } + continue + } + + if ch == '#' { + comment = true + } + + if comment { + continue + } + + if len(val) == 0 { + l.token = token{line: l.line} + if ch == '"' { + quoted = true + continue + } + } + + val = append(val, ch) + } +} |