diff options
Diffstat (limited to 'users/fcuny/exp/monkey/pkg/lexer/lexer.go')
-rw-r--r-- | users/fcuny/exp/monkey/pkg/lexer/lexer.go | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/users/fcuny/exp/monkey/pkg/lexer/lexer.go b/users/fcuny/exp/monkey/pkg/lexer/lexer.go new file mode 100644 index 0000000..3e98cf0 --- /dev/null +++ b/users/fcuny/exp/monkey/pkg/lexer/lexer.go @@ -0,0 +1,152 @@ +// Package lexer provides a lexer to the monkey language. +package lexer + +import "monkey/pkg/token" + +// Lexer represents the lexer +type Lexer struct { + input string + // current position in input + position int + // current reading position in input (after a char) + readPosition int + // current character under examination + ch byte +} + +// New returns a new lexer +func New(input string) *Lexer { + l := &Lexer{input: input} + l.readChar() + return l +} + +// Read the current character and advances our position in the input string. +func (l *Lexer) readChar() { + // if we've reached the end of the input, we set the current character to 0, + // which is the ASCII code for NUL. + if l.readPosition >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPosition] + } + l.position = l.readPosition + l.readPosition++ +} + +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +// we don't care about white space characters, we skip them when we find them. +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +// peekChar returns the character at position (which is the next charatecter), +// but does not increment `readPosition` and `position`. +// This is needed to read tokens that are composed of two characters (e.g. `==`). +func (l *Lexer) peekChar() byte { + if l.readPosition >= len(l.input) { + return 0 + } + return l.input[l.readPosition] +} + +// NextToken reads the next token from the lexer and returns the current token. +func (l *Lexer) NextToken() token.Token { + var tok token.Token + + l.skipWhitespace() + + switch l.ch { + case '=': + if l.peekChar() == '=' { + ch := l.ch + l.readChar() + literal := string(ch) + string(l.ch) + tok = token.Token{Type: token.EQ, Literal: literal} + } else { + tok = newToken(token.ASSIGN, l.ch) + } + case '+': + tok = newToken(token.PLUS, l.ch) + case '-': + tok = newToken(token.MINUS, l.ch) + case '!': + if l.peekChar() == '=' { + ch := l.ch + l.readChar() + literal := string(ch) + string(l.ch) + tok = token.Token{Type: token.NOT_EQ, Literal: literal} + } else { + tok = newToken(token.BANG, l.ch) + } + case '*': + tok = newToken(token.ASTERISK, l.ch) + case '/': + tok = newToken(token.SLASH, l.ch) + case '<': + tok = newToken(token.LT, l.ch) + case '>': + tok = newToken(token.GT, l.ch) + + case ';': + tok = newToken(token.SEMICOLON, l.ch) + case ',': + tok = newToken(token.COMMA, l.ch) + case '(': + tok = newToken(token.LPAREN, l.ch) + case ')': + tok = newToken(token.RPAREN, l.ch) + case '{': + tok = newToken(token.LBRACE, l.ch) + case '}': + tok = newToken(token.RBRACE, l.ch) + case 0: + tok.Literal = "" + tok.Type = token.EOF + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdent(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = token.INT + tok.Literal = l.readNumber() + return tok + } else { + tok = newToken(token.ILLEGAL, l.ch) + } + + } + + l.readChar() + return tok +} + +func newToken(tokenType token.TokenType, ch byte) token.Token { + return token.Token{Type: tokenType, Literal: string(ch)} +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} |