valhallac

Compiler for set-theoretic programming language.
git clone git://git.knutsen.co/valhallac
Log | Files | Refs | README | LICENSE

commit af7eff7a7dfa76a8d905c9f2587cdf1b2a68ca74
parent 14d55b03d14a2603228c71ae55059034267707ae
Author: Demonstrandum <moi@knutsen.co>
Date:   Mon, 29 Jul 2019 23:59:56 +0100

Basic assignment is properly compiled.

Diffstat:
MCargo.toml | 2+-
Msrc/compiler/block.rs | 43+++++++++++++++++++++++++++++++++++--------
Msrc/compiler/instructions.rs | 7++++++-
Msrc/err.rs | 1+
Msrc/syntax/analyser.rs | 43++++++++++++++++++++++++++++++-------------
Msrc/syntax/ast.rs | 14++++++++------
Msrc/syntax/parser.rs | 9+++++++--
Mtest.vh | 10+++-------
8 files changed, 91 insertions(+), 38 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -6,7 +6,7 @@ repository = "https://github.com/Demonstrandum/valhalla" documentation = "https://github.com/Demonstrandum/valhalla" keywords = ["set-theory", "programming", "language", "parser", "compiler"] categories = ["parser-implementations", "parsing", "encoding", "command-line-interface"] -license = "GPL-3.0" +#license = "GPL-3.0" license-file = "LICENSE.md" readme = "README.md" version = "0.1.0" diff --git a/src/compiler/block.rs b/src/compiler/block.rs @@ -37,7 +37,7 @@ pub struct LocalBlock<'a> { instructions : Vec<Instr>, // Used only for compilation: - locals_map : HashMap<String, usize>, + locals_map : HashMap<String, u16>, current_line : usize, } @@ -72,8 +72,11 @@ impl<'a> LocalBlock<'a> { let s = &ident_node.value; if !self.locals_map.contains_key(s) { issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line, - "Trying to use unbound local variable `{}`.", s); + "Trying to use unbound local variable `{}'.", s); } + + self.instructions.push(Instr::Operator(Operators::PUSH_LOCAL as u8)); + self.instructions.push(Instr::Operand(self.locals_map[s])); }, ast::Nodes::Num(num_node) => { self.push_const_instr(numerics_to_element(&num_node.value)); @@ -87,19 +90,39 @@ impl<'a> LocalBlock<'a> { ast::Nodes::Call(call_node) => { if call_node.is_binary() { let ident = call_node.callee.call().unwrap().callee.ident().unwrap(); - let args = vec![ - &call_node.operands[0], - &call_node.callee.call().unwrap().operands[0], + &call_node.callee.call().unwrap().operands[0], // left + &call_node.operands[0], // right ]; - let inop = internal_functions::get_internal_op(&ident.value, Some(&args)); - if let Some(op) = inop { + // Check for assignment. + if ident.value == "=" { + // Direct variable assignment: + if let Some(left) = args[0].ident() { + if self.locals_map.contains_key(&left.value) { + issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line, + "Cannot mutate value of `{}', as is already bound.", left.value); + } + let index = self.locals_map.len() as u16; + self.locals_map.insert(left.value.to_owned(), index); + self.emit(args[1]); + self.instructions.push(Instr::Operator(Operators::STORE_LOCAL as u8)); + self.instructions.push(Instr::Operand(index)); + } + return; + } + + // Check for fast internal binary operations such as +, -, *, /, etc. + let maybe_op = internal_functions::get_internal_op(&ident.value, Some(&args)); + if let Some(op) = maybe_op { self.emit(args[0]); self.emit(args[1]); - self.instructions.push(op) + self.instructions.push(op); + return; } } + self.emit(&call_node.operands[0]); + self.emit(&*call_node.callee); }, _ => () }; @@ -118,6 +141,10 @@ impl<'a> fmt::Display for LocalBlock<'a> { for (i, c) in self.constants.iter().enumerate() { write!(f, "{: >3} | {} |\n", i, c)?; } + write!(f, "===Locals==================\n")?; + for key in self.locals_map.keys() { + write!(f, "{: >3} | {}\n", self.locals_map[key], key)?; + } write!(f, "===Bytecodes===============\n")?; for inst in &self.instructions { write!(f, "{}", inst)?; diff --git a/src/compiler/instructions.rs b/src/compiler/instructions.rs @@ -14,7 +14,12 @@ impl fmt::Display for Instr { let s = match &self { Instr::Operand(n) => format!("{: >5}\n", n), Instr::Operator(n) => { - format!("({:08b}):{}", n, Operators::from_u8(*n).unwrap()) + let op_str = &Operators::from_u8(*n).unwrap().to_string(); + if op_str.ends_with("\n") { + format!("({:08b}):{}", n, op_str) + } else { + format!("({:08b}):{: <11}", n, op_str) + } } }; write!(f, "{}", s) diff --git a/src/err.rs b/src/err.rs @@ -7,6 +7,7 @@ use std::io::{BufRead, BufReader}; use colored; use colored::*; +#[allow(non_camel_case_types)] pub struct NO_TOKEN; pub enum Types { diff --git a/src/syntax/analyser.rs b/src/syntax/analyser.rs @@ -5,6 +5,24 @@ fn constant_fold(node : &ast::Nodes) -> Option<ast::Nodes> { if node.call().is_some() && node.call().unwrap().is_binary() { let operation = node.call().unwrap().callee.call().unwrap().callee.ident(); if let Some(op) = operation { + match op.value.as_str() { + "+" | "-" | "*" | "/" => (), + _ => { + let mut new_call = *node.call().unwrap().callee.clone(); + let mut new_op = node.call().unwrap().operands[0].clone(); + + let maybe_call = constant_fold(&new_call); + let maybe_op = constant_fold(&new_op); + + if let Some(call) = maybe_call { + new_call = call; + } + if maybe_op.is_some() { + new_op = maybe_op.unwrap(); + } + return Some(ast::CallNode::new(new_call, vec![new_op])); + } + } let right = node.call().unwrap().operands.get(0); let left = node.call().unwrap().callee.call().unwrap().operands.get(0); @@ -12,8 +30,8 @@ fn constant_fold(node : &ast::Nodes) -> Option<ast::Nodes> { || right.is_none() { return None; } - let mut l_value = ast::Numerics::Natural(0); - let mut r_value = ast::Numerics::Natural(0); + let l_value; + let r_value; if left.unwrap().num().is_some() && right.unwrap().num().is_some() { @@ -31,15 +49,14 @@ fn constant_fold(node : &ast::Nodes) -> Option<ast::Nodes> { l_value = foldl.unwrap().num().unwrap().value; r_value = foldr.unwrap().num().unwrap().value; } - return Some(ast::Nodes::Num(ast::NumNode { - value: match op.value.as_str() { - "+" => l_value + r_value, - "-" => l_value - r_value, - "*" => l_value * r_value, - "/" => l_value / r_value, - _ => ast::Numerics::Natural(0) - } - })); + let value = match op.value.as_str() { + "+" => l_value + r_value, + "-" => l_value - r_value, + "*" => l_value * r_value, + "/" => l_value / r_value, + _ => return None + }; + return Some(ast::Nodes::Num(ast::NumNode { value })); } } None @@ -52,8 +69,8 @@ pub fn replace(root : &mut ast::Root) { let node = &root.branches[i]; { // START TOP-LEVEL CONSTANT FOLD let new = constant_fold(node); - if let Some(nbranch) = new { - root.branches[i] = nbranch; + if let Some(branch) = new { + root.branches[i] = branch; } } // END TOP-LEVEL CONSTANT FOLD i += 1; diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs @@ -4,11 +4,11 @@ use std::{fmt, ops}; /// will represent a value stored. #[derive(Clone)] pub struct IdentNode { - /// The name of the identifer. + /// The name of the identifier. pub value : String } -/// Different types of possible number types in the langauge. +/// Different types of possible number types in the language. /// Max size is determined by max pointer size. #[derive(PartialEq, Clone, Copy, Debug)] pub enum Numerics { @@ -20,7 +20,7 @@ pub enum Numerics { Real(f64) } -fn stronges_cast(left : Numerics, right : Numerics) -> BaseTypes { +fn strongest_cast(left : Numerics, right : Numerics) -> BaseTypes { let mut cast = BaseTypes::TNatural; match left { Numerics::Real(_) => cast = BaseTypes::TReal, @@ -49,7 +49,7 @@ macro_rules! new_base { macro_rules! fold_on_numeric { ($op:tt, $left:expr, $right:expr) => { { - let cast = stronges_cast($left, $right); + let cast = strongest_cast($left, $right); match cast { BaseTypes::TNatural => (new_base!($left, usize) $op new_base!($right, usize)).to_numeric(), BaseTypes::TInteger => (new_base!($left, isize) $op new_base!($right, isize)).to_numeric(), @@ -263,7 +263,7 @@ impl fmt::Display for Nodes { Nodes::Call(node) => format!( "%call{{\n :callee ({})\n :operands [|\n {}\n |]\n}}", node.callee, node.operands.iter().map(Nodes::to_string).collect::<Vec<String>>().join("\n ")), - Nodes::Block(node) => format!("%block{{ ... }}"), + Nodes::Block(_) => format!("%block{{ ... }}"), Nodes::Line(node) => format!("%newline{{ :line {} }}", node.line), Nodes::Empty(_) => String::from("()"), }; @@ -304,6 +304,8 @@ impl Nodes { pub fn sym(&self) -> Option<&SymNode> { unwrap_enum!(self, Nodes::Sym) } pub fn call(&self) -> Option<&CallNode> { unwrap_enum!(self, Nodes::Call) } pub fn block(&self) -> Option<&BlockNode> { unwrap_enum!(self, Nodes::Block) } + pub fn line(&self) -> Option<&LineNode> { unwrap_enum!(self, Nodes::Line) } + pub fn empty(&self) -> Option<&EmptyNode> { unwrap_enum!(self, Nodes::Empty) } pub fn is_atomic(&self) -> bool { match self { @@ -394,7 +396,7 @@ pub fn pretty_print(node : &Nodes, depth : usize) -> String { ops=pretty_print(&n.operands[0], depth + 2), tab=tab, T=TAB) }) ), - Nodes::Block(n) => format!("%block{{ ... }}"), + Nodes::Block(_) => format!("%block{{ ... }}"), _ => format!("{}{}", tab, node) }; printable diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs @@ -5,7 +5,7 @@ use super::operators; use super::super::err; use token::{Token, TokenType}; -use ast::{Nodes, Numerics}; +use ast::Nodes; pub fn parse(stream : Vec<Token>, file : &str) -> ast::Root { let mut environment = ParseEnvironment::new(stream, file); @@ -56,6 +56,10 @@ impl<'a> ParseEnvironment<'a> { fn shift(&mut self) -> Token { let shifted = self.stream.remove(0); if shifted.location.line as usize != self.line_number { + if self.root.branches.last().is_some() + && self.root.branches.last().unwrap().line().is_some() { + self.root.branches.pop(); + } self.line_number = shifted.location.line as usize; self.root.branches.push(ast::LineNode::new(self.line_number)); } @@ -76,7 +80,7 @@ impl<'a> ParseEnvironment<'a> { let prefix = self.optable.lookup(&token.string, 1); return match self.stream[0].class { TokenType::RParen => { - ast::CallNode::new(ast::IdentNode::new(&token.string), vec![]) + ast::IdentNode::new(&token.string) }, _ => { if prefix.is_none() { @@ -197,6 +201,7 @@ impl<'a> ParseEnvironment<'a> { #[cfg(test)] mod test { use super::*; + use ast::Numerics; #[test] fn numeric_parsing() { diff --git a/test.vh b/test.vh @@ -1,6 +1,2 @@ -f = 2 -x = 4 - -x - -f x- \ No newline at end of file +a = 3 + 6 +b = a + 2+ \ No newline at end of file