commit 8067a7ad116baace1dbd9537bbb8cc66023b870a
parent 8fd9e72b17b0aee84e055abc8e2474ef32de2efa
Author: Fredrik Knutsen <moi@knutsen.co>
Date: Tue, 9 Jul 2019 22:20:40 +0100
Basic Arithmetic Lexer.
Diffstat:
9 files changed, 182 insertions(+), 1 deletion(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,3 +5,5 @@ authors = ["Demonstrandum <moi@knutsen.co>"]
edition = "2018"
[dependencies]
+lazy_static = "1.3.0"
+regex = "1"+
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
@@ -1 +1,7 @@
-fn main() {}
+mod syntax;
+
+fn main() {
+ println!("\nTill Valhalla!\n");
+
+ syntax::parse_file("./test.vh");
+}
diff --git a/src/syntax/internal_macros.rs b/src/syntax/internal_macros.rs
diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs
@@ -0,0 +1,87 @@
+use super::token;
+use token::{Token, TokenType};
+
+use super::location;
+
+use lazy_static::lazy_static;
+use regex::Regex;
+
+macro_rules! re {
+ ($string:expr) => {
+ Regex::new($string).unwrap()
+ };
+}
+
+trait RegexExt { fn first_match(&self, string : &str) -> Option<String>; }
+impl RegexExt for Regex {
+ fn first_match(&self, string : &str) -> Option<String> {
+ let cap = self.captures(string);
+ match cap {
+ Some(c) => {
+ match c.get(1) {
+ Some(m) => Some(String::from(m.as_str())),
+ None => None
+ }
+ },
+ None => None
+ }
+ }
+}
+
+const IDENT_CHARS : &str = r"\p{L}\?\!\'\-\_";
+
+lazy_static! {
+ static ref OP : Regex = re!(r"\A([\+\.\*\|\\/\&%\$\^\~><=¬@\-]+)");
+ static ref IDENT : Regex = re!(&format!(r"\A([{id}][{id}\p{{N}}]+)", id=IDENT_CHARS));
+ static ref NUM : Regex = re!(r"\A(\-?(?:(?:[0-9]+(?:\.[0-9]+)?(?:e[+-]?[0-9]+)?)|(?:0x[0-9a-f]+)|(?:0b[01]+)|(?:0o[0-7]+)))");
+}
+
+macro_rules! try_match {
+ ($stream:expr, $partial:expr,
+ $reg:expr, $token_type:expr,
+ $current_char:expr, $line:expr, $col:expr) => {
+ if let Some(matched) = $reg.first_match($partial) {
+ let span = matched.chars().count() as u32;
+ $stream.push(Token::new(
+ $token_type, &matched,
+ location::new($line, $col, span)
+ ));
+ $current_char += matched.len();
+ $col += span;
+ continue;
+ }
+ };
+}
+
+pub fn lex(string : String) -> Vec<Token> {
+ let mut token_stream : Vec<Token> = Vec::new();
+
+ let mut current_char = 0;
+ let string_size = string.len();
+
+ let mut partial : &str;
+ let mut line = 1;
+ let mut col = 1;
+
+ while current_char < string_size {
+ partial = &string[current_char..];
+
+ try_match!(token_stream, partial,
+ NUM, TokenType::Num,
+ current_char, line, col);
+
+ try_match!(token_stream, partial,
+ OP, TokenType::Op,
+ current_char, line, col);
+
+ if partial.chars().nth(0).unwrap() == '\n' {
+ line += 1;
+ col = 1;
+ current_char += 1;
+ continue;
+ }
+ current_char += 1;
+ if partial.is_char_boundary(0) { col += 1 }
+ }
+ token_stream
+}+
\ No newline at end of file
diff --git a/src/syntax/location.rs b/src/syntax/location.rs
@@ -0,0 +1,14 @@
+pub struct Loc {
+ pub line : u32,
+ pub col : u32,
+ pub span : u32,
+}
+
+pub fn new(line : u32, col : u32, span : u32) -> Loc {
+ Loc {
+ line: line,
+ col: col,
+ span: span
+ }
+}
+
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
@@ -0,0 +1,17 @@
+mod location;
+mod token;
+
+pub mod lexer;
+pub mod parser;
+
+use std::fs;
+use token::ShowStream;
+
+pub fn parse_file(filename : &str) {
+ let code = fs::read_to_string(filename)
+ .expect("Could not open file for reading.");
+ println!("Code:\n{}\n", code);
+
+ let stream = lexer::lex(code);
+ println!("Stream:\n{}\n", stream.to_string());
+}+
\ No newline at end of file
diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
@@ -0,0 +1,49 @@
+use std::fmt;
+use super::location;
+
+pub enum TokenType {
+ Ident,
+ Num,
+ Op,
+ Sym,
+ Str,
+}
+
+impl fmt::Display for TokenType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let printable = match *self {
+ TokenType::Ident => "Identifier",
+ TokenType::Num => "Numeric",
+ TokenType::Op => "Operator",
+ TokenType::Sym => "Symbol",
+ TokenType::Str => "String"
+ };
+ write!(f, "{}", printable)
+ }
+}
+
+pub struct Token {
+ pub class : TokenType,
+ pub string : String,
+ pub location : location::Loc,
+}
+
+impl Token {
+ pub fn new(class : TokenType, string : &str, loc : location::Loc) -> Token {
+ Token { class: class, string: String::from(string), location: loc }
+ }
+
+ pub fn to_string(&self) -> String {
+ String::from(format!("[ {class}: \"{rep}\" ({l}, {c}) ]",
+ class=self.class, rep=self.string,
+ l=self.location.line, c=self.location.col))
+ }
+}
+
+pub trait ShowStream { fn to_string(&self) -> String; }
+impl ShowStream for Vec<Token> {
+ fn to_string(&self) -> String {
+ let lines : Vec<String> = self.into_iter().map(|t| t.to_string()).collect();
+ format!("[ {} ]", lines.join("\n "))
+ }
+}+
\ No newline at end of file
diff --git a/test.vh b/test.vh
@@ -0,0 +1 @@
+1 + 2 * 3 + -4 - 5+
\ No newline at end of file