aboutsummaryrefslogtreecommitdiff
path: root/core/parse/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'core/parse/lexer.go')
-rw-r--r--core/parse/lexer.go122
1 files changed, 122 insertions, 0 deletions
diff --git a/core/parse/lexer.go b/core/parse/lexer.go
new file mode 100644
index 000000000..d2939eba2
--- /dev/null
+++ b/core/parse/lexer.go
@@ -0,0 +1,122 @@
+package parse
+
+import (
+ "bufio"
+ "io"
+ "unicode"
+)
+
+type (
+ // lexer is a utility which can get values, token by
+ // token, from a Reader. A token is a word, and tokens
+ // are separated by whitespace. A word can be enclosed
+ // in quotes if it contains whitespace.
+ lexer struct {
+ reader *bufio.Reader
+ token token
+ line int
+ }
+
+ // token represents a single parsable unit.
+ token struct {
+ file string
+ line int
+ text string
+ }
+)
+
+// load prepares the lexer to scan an input for tokens.
+func (l *lexer) load(input io.Reader) error {
+ l.reader = bufio.NewReader(input)
+ l.line = 1
+ return nil
+}
+
+// next loads the next token into the lexer.
+// A token is delimited by whitespace, unless
+// the token starts with a quotes character (")
+// in which case the token goes until the closing
+// quotes (the enclosing quotes are not included).
+// Inside quoted strings, quotes may be escaped
+// with a preceding \ character. No other chars
+// may be escaped. The rest of the line is skipped
+// if a "#" character is read in. Returns true if
+// a token was loaded; false otherwise.
+func (l *lexer) next() bool {
+ var val []rune
+ var comment, quoted, escaped bool
+
+ makeToken := func() bool {
+ l.token.text = string(val)
+ return true
+ }
+
+ for {
+ ch, _, err := l.reader.ReadRune()
+ if err != nil {
+ if len(val) > 0 {
+ return makeToken()
+ }
+ if err == io.EOF {
+ return false
+ }
+ panic(err)
+ }
+
+ if quoted {
+ if !escaped {
+ if ch == '\\' {
+ escaped = true
+ continue
+ } else if ch == '"' {
+ quoted = false
+ return makeToken()
+ }
+ }
+ if ch == '\n' {
+ l.line++
+ }
+ if escaped {
+ // only escape quotes
+ if ch != '"' {
+ val = append(val, '\\')
+ }
+ }
+ val = append(val, ch)
+ escaped = false
+ continue
+ }
+
+ if unicode.IsSpace(ch) {
+ if ch == '\r' {
+ continue
+ }
+ if ch == '\n' {
+ l.line++
+ comment = false
+ }
+ if len(val) > 0 {
+ return makeToken()
+ }
+ continue
+ }
+
+ if ch == '#' {
+ comment = true
+ }
+
+ if comment {
+ continue
+ }
+
+ if len(val) == 0 {
+ l.token = token{line: l.line}
+ if ch == '"' {
+ quoted = true
+ continue
+ }
+ }
+
+ val = append(val, ch)
+ }
+}