valhallac

Compiler for set-theoretic programming language.
git clone git://git.knutsen.co/valhallac
Log | Files | Refs | README | LICENSE

commit 8067a7ad116baace1dbd9537bbb8cc66023b870a
parent 8fd9e72b17b0aee84e055abc8e2474ef32de2efa
Author: Fredrik Knutsen <moi@knutsen.co>
Date:   Tue,  9 Jul 2019 22:20:40 +0100

Basic Arithmetic Lexer.

Diffstat:
MCargo.toml | 3+++
Msrc/main.rs | 8+++++++-
Asrc/syntax/internal_macros.rs | 0
Asrc/syntax/lexer.rs | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/syntax/location.rs | 14++++++++++++++
Asrc/syntax/mod.rs | 18++++++++++++++++++
Asrc/syntax/parser.rs | 0
Asrc/syntax/token.rs | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Atest.vh | 2++
9 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -5,3 +5,5 @@ authors = ["Demonstrandum <moi@knutsen.co>"] edition = "2018" [dependencies] +lazy_static = "1.3.0" +regex = "1"+ \ No newline at end of file diff --git a/src/main.rs b/src/main.rs @@ -1 +1,7 @@ -fn main() {} +mod syntax; + +fn main() { + println!("\nTill Valhalla!\n"); + + syntax::parse_file("./test.vh"); +} diff --git a/src/syntax/internal_macros.rs b/src/syntax/internal_macros.rs diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs @@ -0,0 +1,87 @@ +use super::token; +use token::{Token, TokenType}; + +use super::location; + +use lazy_static::lazy_static; +use regex::Regex; + +macro_rules! re { + ($string:expr) => { + Regex::new($string).unwrap() + }; +} + +trait RegexExt { fn first_match(&self, string : &str) -> Option<String>; } +impl RegexExt for Regex { + fn first_match(&self, string : &str) -> Option<String> { + let cap = self.captures(string); + match cap { + Some(c) => { + match c.get(1) { + Some(m) => Some(String::from(m.as_str())), + None => None + } + }, + None => None + } + } +} + +const IDENT_CHARS : &str = r"\p{L}\?\!\'\-\_"; + +lazy_static! { + static ref OP : Regex = re!(r"\A([\+\.\*\|\\/\&%\$\^\~><=¬@\-]+)"); + static ref IDENT : Regex = re!(&format!(r"\A([{id}][{id}\p{{N}}]+)", id=IDENT_CHARS)); + static ref NUM : Regex = re!(r"\A(\-?(?:(?:[0-9]+(?:\.[0-9]+)?(?:e[+-]?[0-9]+)?)|(?:0x[0-9a-f]+)|(?:0b[01]+)|(?:0o[0-7]+)))"); +} + +macro_rules! try_match { + ($stream:expr, $partial:expr, + $reg:expr, $token_type:expr, + $current_char:expr, $line:expr, $col:expr) => { + if let Some(matched) = $reg.first_match($partial) { + let span = matched.chars().count() as u32; + $stream.push(Token::new( + $token_type, &matched, + location::new($line, $col, span) + )); + $current_char += matched.len(); + $col += span; + continue; + } + }; +} + +pub fn lex(string : String) -> Vec<Token> { + let mut token_stream : Vec<Token> = Vec::new(); + + let mut current_char = 0; + let string_size = string.len(); + + let mut partial : &str; + let mut line = 1; + let mut col = 1; + + while current_char < string_size { + partial = &string[current_char..]; + + try_match!(token_stream, partial, + NUM, TokenType::Num, + current_char, line, col); + + try_match!(token_stream, partial, + OP, TokenType::Op, + current_char, line, col); + + if partial.chars().nth(0).unwrap() == '\n' { + line += 1; + col = 1; + current_char += 1; + continue; + } + current_char += 1; + if partial.is_char_boundary(0) { col += 1 } + } + token_stream +}+ \ No newline at end of file diff --git a/src/syntax/location.rs b/src/syntax/location.rs @@ -0,0 +1,14 @@ +pub struct Loc { + pub line : u32, + pub col : u32, + pub span : u32, +} + +pub fn new(line : u32, col : u32, span : u32) -> Loc { + Loc { + line: line, + col: col, + span: span + } +} + diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs @@ -0,0 +1,17 @@ +mod location; +mod token; + +pub mod lexer; +pub mod parser; + +use std::fs; +use token::ShowStream; + +pub fn parse_file(filename : &str) { + let code = fs::read_to_string(filename) + .expect("Could not open file for reading."); + println!("Code:\n{}\n", code); + + let stream = lexer::lex(code); + println!("Stream:\n{}\n", stream.to_string()); +}+ \ No newline at end of file diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs diff --git a/src/syntax/token.rs b/src/syntax/token.rs @@ -0,0 +1,49 @@ +use std::fmt; +use super::location; + +pub enum TokenType { + Ident, + Num, + Op, + Sym, + Str, +} + +impl fmt::Display for TokenType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let printable = match *self { + TokenType::Ident => "Identifier", + TokenType::Num => "Numeric", + TokenType::Op => "Operator", + TokenType::Sym => "Symbol", + TokenType::Str => "String" + }; + write!(f, "{}", printable) + } +} + +pub struct Token { + pub class : TokenType, + pub string : String, + pub location : location::Loc, +} + +impl Token { + pub fn new(class : TokenType, string : &str, loc : location::Loc) -> Token { + Token { class: class, string: String::from(string), location: loc } + } + + pub fn to_string(&self) -> String { + String::from(format!("[ {class}: \"{rep}\" ({l}, {c}) ]", + class=self.class, rep=self.string, + l=self.location.line, c=self.location.col)) + } +} + +pub trait ShowStream { fn to_string(&self) -> String; } +impl ShowStream for Vec<Token> { + fn to_string(&self) -> String { + let lines : Vec<String> = self.into_iter().map(|t| t.to_string()).collect(); + format!("[ {} ]", lines.join("\n ")) + } +}+ \ No newline at end of file diff --git a/test.vh b/test.vh @@ -0,0 +1 @@ +1 + 2 * 3 + -4 - 5+ \ No newline at end of file