valhallac

Compiler for set-theoretic programming language.
git clone git://git.knutsen.co/valhallac
Log | Files | Refs | README | LICENSE

commit ece0a0bff17b70d82041c27d09098b3ae4a72c0d
parent e02b040c8875a39682c1683bbb19d516bea95a98
Author: Fredrik <moi@knutsen.co>
Date:   Wed, 23 Oct 2019 16:59:14 +0100

Added raw-print as opcode and language builtin (#1)

Added raw-print as opcode and language builtin
Diffstat:
A<main> | 0
Msamples/functions.vh | 4++--
Msrc/bin.rs | 7+++++--
Msrc/compiler/block.rs | 111++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Msrc/compiler/element.rs | 16++++++++--------
Msrc/compiler/instructions.rs | 139+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Msrc/compiler/marshal.rs | 37+++++++++++++++++++++++++++++++++++++
Msrc/lib.rs | 16++++++++++++----
Msrc/syntax/analyser.rs | 36+++++++++++++++++++++++-------------
Msrc/syntax/ast.rs | 99++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Msrc/syntax/lexer.rs | 19++++++++++---------
Msrc/syntax/location.rs | 6++++--
Msrc/syntax/parser.rs | 153++++++++++++++++++++++++++++++++++++++++---------------------------------------
Msrc/syntax/token.rs | 4++--
Mtest.vh | 17+++--------------
15 files changed, 432 insertions(+), 232 deletions(-)

diff --git a/<main> b/<main> Binary files differ. diff --git a/samples/functions.vh b/samples/functions.vh @@ -6,10 +6,10 @@ a 1 2 -- a = n |-> (m |-> n - 2*m) -- |_____________| -- | --- func: `a__1` +-- func: `__a_final` -- |_____________________| -- | --- func: `a__0` +-- func: `__a__0` -- |__________________________| -- | -- func: a \ No newline at end of file diff --git a/src/bin.rs b/src/bin.rs @@ -5,12 +5,15 @@ fn is_vh_file(filename : &String) -> bool { filename.ends_with(".vh") } -pub fn main() { +pub fn main() -> Result<(), i32> { let args = env::args(); let files = args.filter(is_vh_file); for file in files { - valhallac::parse(&file); + let root = valhallac::parse(&file); + let block = valhallac::compile(&root); + valhallac::binary_gen(&block); } + Ok(()) } \ No newline at end of file diff --git a/src/compiler/block.rs b/src/compiler/block.rs @@ -12,6 +12,7 @@ use super::instructions; use element::{Element, Symbol}; use instructions::{Instr, Operators}; +use num_traits::cast::FromPrimitive; use super::internal_functions; @@ -35,17 +36,20 @@ struct IdentTypePair<'a>(String, &'a Nodes); #[derive(Clone)] pub struct LocalBlock<'a> { pub name : String, - filename : String, - constants : Vec<Element<'a>>, - instructions : Vec<Instr>, - globals : Vec<String>, + pub filename : String, + pub constants : Vec<Element<'a>>, + pub instructions : Vec<Instr>, + pub globals : Vec<String>, pub operand_type : ast::StaticTypes, pub return_type : ast::StaticTypes, // Used only for compilation: locals_map : HashMap<String, u16>, types_to_check : VecDeque<IdentTypePair<'a>>, - current_line : usize, + current_line : usize, + current_depth : usize, + stack_depth : usize, + last_instruction : Instr } impl<'a> PartialEq for LocalBlock<'a> { @@ -68,7 +72,10 @@ impl<'a> LocalBlock<'a> { locals_map: HashMap::new(), types_to_check: VecDeque::new(), - current_line: 0, + current_line: 0, + stack_depth: 0, + current_depth: 0, + last_instruction: Instr::Operator(0) } } @@ -78,12 +85,30 @@ impl<'a> LocalBlock<'a> { self.push_operand(index); } + fn change_stack_depth(&mut self, i : isize) { + self.current_depth = ( + (self.current_depth as isize) + i + ) as usize; + if self.current_depth > self.stack_depth { + self.stack_depth = self.current_depth; + } + } + fn push_operator(&mut self, o : Operators) { - self.instructions.push(Instr::Operator(o as u8)); + let instr = Instr::Operator(o as u8); + if !o.takes_operand() { + self.change_stack_depth(instr.depth_delta(None)); + } + self.last_instruction = instr; + self.instructions.push(instr); } fn push_operand(&mut self, i : u16) { - self.instructions.push(Instr::Operand(i)); + let operand = Instr::Operand(i); + self.instructions.push(operand); + self.change_stack_depth( + self.last_instruction.depth_delta( + Some(operand))); } fn insert_local(&mut self, s : String) -> u16 { @@ -124,16 +149,17 @@ impl<'a> LocalBlock<'a> { let base_node = arguments.remove(0); if let Nodes::Ident(ident) = base_node { - let name = format!("{}__{}", ident.value.to_owned(), arguments.len() - 1); + let name = format!("__{}_final", ident.value.to_owned()); let mut last_block = LocalBlock::new(&name, &self.filename); // TODO: Be more careful here, not always an ident. // NEED TO DEAL WITH PATTERN MATCHING. last_block.insert_local(arguments.last().unwrap().ident().unwrap().value.to_owned()); last_block.emit(right); + last_block.yield_last(); for i in (0..(arguments.len() - 1)).rev() { - let name = format!("{}__{}", ident.value, i); + let name = format!("__{}_{}", ident.value, i); let mut super_block = LocalBlock::new( &name, &self.filename); @@ -144,6 +170,7 @@ impl<'a> LocalBlock<'a> { super_block.push_const_instr(Element::ECode(last_block)); super_block.push_const_instr(Element::ESymbol(Symbol::new(&block_name))); super_block.push_operator(Operators::MAKE_FUNC); + super_block.yield_last(); last_block = super_block; } @@ -168,19 +195,21 @@ impl<'a> LocalBlock<'a> { } fn emit(&mut self, node : &'a Nodes) { - match node { - Nodes::Line(line_node) => { - let len = self.instructions.len(); - if len > 1 { - if self.instructions[len - 2] == Instr::Operator(Operators::SET_LINE as u8) { - self.instructions.pop(); - self.instructions.pop(); - } + let current_line = node.location().line as usize; + if self.current_line != current_line { + let len = self.instructions.len(); + if len > 1 { + if self.instructions[len - 2] == Instr::Operator(Operators::SET_LINE as u8) { + self.instructions.pop(); + self.instructions.pop(); } - self.current_line = line_node.line; - self.push_operator(Operators::SET_LINE); - self.push_operand(self.current_line as u16); } + self.current_line = current_line; + self.push_operator(Operators::SET_LINE); + self.push_operand(self.current_line as u16); + } + + match node { Nodes::Ident(ident_node) => { let s = &ident_node.value; if !self.locals_map.contains_key(s) { @@ -203,6 +232,17 @@ impl<'a> LocalBlock<'a> { self.push_const_instr(Element::ESymbol(Symbol::new(&sym_node.value))); }, Nodes::Call(call_node) => { + if let Nodes::Ident(ident_node) = &*call_node.callee { + let mut do_return = true; + match ident_node.value.as_str() { + "__raw_print" => { + self.emit(&call_node.operands[0]); + self.push_operator(Operators::RAW_PRINT); + } + _ => do_return = false + }; + if do_return { return; } + } if call_node.is_binary() { let ident = call_node.callee.call().unwrap().callee.ident().unwrap(); let args = vec![ @@ -267,11 +307,12 @@ impl<'a> LocalBlock<'a> { // Check for fast internal binary operations such as +, -, *, /, etc. let maybe_op = internal_functions::get_internal_op(&ident.value, Some(&args)); if let Some(op) = maybe_op { + if let Instr::Operator(operator) = op { self.emit(args[1]); self.emit(args[0]); - self.instructions.push(op); + self.push_operator(Operators::from_u8(operator).unwrap()); return; - } + }} } // TODO: Optimise to implicitly ignore currying and use CALL_N instead. // Also, check that we are indeed calling a function, and not anything else @@ -284,10 +325,15 @@ impl<'a> LocalBlock<'a> { }; } + fn yield_last(&mut self) { + self.push_operator(Operators::YIELD); + } + pub fn generate(&mut self, nodes : &'a Vec<Nodes>) { for node in nodes { self.emit(node); } + self.yield_last(); } } @@ -298,20 +344,27 @@ impl<'a> fmt::Display for LocalBlock<'a> { write!(f, "{}", local_block)?; } } - write!(f, "\n{}:\n", self.name)?; - write!(f, " | ===Constants===============\n")?; + write!(f, "\n{}:", self.name)?; + write!(f," + |[meta]: + | stack-depth: {} + | file-name: {}\n", + self.stack_depth, + self.filename)?; + + write!(f, " |====Constants===============\n")?; for (i, c) in self.constants.iter().enumerate() { - write!(f, " | {: >3} | {} |\n", i, c)?; + write!(f, " | {: >3} | {}\n", i, c)?; } - write!(f, " | ===Locals==================\n")?; + write!(f, " |====Locals==================\n")?; for key in self.locals_map.keys() { write!(f, " | {: >3} | {}\n", self.locals_map[key], key)?; } - write!(f, " | ===Globals=================\n")?; + write!(f, " |====Globals=================\n")?; for (i, c) in self.globals.iter().enumerate() { write!(f, " | {: >3} | {}\n", i, c)?; } - write!(f, " | ===Bytecodes===============\n")?; + write!(f, " |====Bytecodes===============\n")?; for inst in &self.instructions { if let Instr::Operand(_) = inst { write!(f, "{}", inst)?; diff --git a/src/compiler/element.rs b/src/compiler/element.rs @@ -67,14 +67,14 @@ impl<'a> Element<'a> { impl<'a> fmt::Display for Element<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self { - Element::ENatural(t) => format!("{: <5} => (Nat) ", t), - Element::EInteger(t) => format!("{: <5} => (Int) ", t), - Element::EReal(t) => format!("{: <5} => (Real) ", if t.fract() == 0f64 { format!("{:.1}", t) } else { f64::to_string(t) }), - Element::EString(t) => format!("{: <5} => (String)", format!("\"{}\"", escape(t))), - Element::ESymbol(t) => format!("{: <5} => (Sym) ", t.to_string()), - Element::ECode(t) => format!("{: <5} => (Block) ", t.name), - Element::ESet(t) => format!("{: <5p} => (Set) ", t), - Element::ENil => format!("{: <5} => (Nil) ", "nil"), + Element::ENatural(t) => format!("{: <13} (Nat) ", t), + Element::EInteger(t) => format!("{: <13} (Int) ", t), + Element::EReal(t) => format!("{: <13} (Real)", if t.fract() == 0f64 { format!("{:.1}", t) } else { f64::to_string(t) }), + Element::EString(t) => format!("{: <13} (Str) ", format!("\"{}\"", escape(t))), + Element::ESymbol(t) => format!("{: <13} (Sym) ", t.to_string()), + Element::ECode(t) => format!("{: <13} (Code)", t.name), + Element::ESet(t) => format!("{: <13p} (Set) ", t), + Element::ENil => format!("{: <13} (Nil) ", "nil"), }; write!(f, "{}", s) } diff --git a/src/compiler/instructions.rs b/src/compiler/instructions.rs @@ -3,22 +3,66 @@ use std::fmt; use enum_primitive_derive::Primitive; use num_traits::{FromPrimitive}; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum Instr { Operator(u8), Operand(u16) } +impl Instr { + pub fn depth_delta(&self, maybe_operand : Option<Instr>) -> isize { + if let Instr::Operand(_) = self + { panic!("An operand does not have an impact on stack depth."); } + + if let Some(instr_operand) = maybe_operand { + if let Instr::Operand(operand) = instr_operand { + if let Instr::Operator(code) = self { + return match Operators::from_u8(code.to_owned()).unwrap() { + Operators::HALT => 0, + Operators::PUSH_CONST => 1, + Operators::PUSH_LOCAL => 1, + Operators::PUSH_SUPER => 1, + Operators::STORE_LOCAL => -1, + Operators::DUP_N => operand as isize, + Operators::CAST => 0, + Operators::SET_LINE => 0, + _ => panic!("This type of opcode doesn't take operands.") + }; + }} + } else { + if let Instr::Operator(code) = self { + match code { + 40..=56 => return -1, + _ => () + } + return match Operators::from_u8(code.to_owned()).unwrap() { + Operators::POP => -1, + Operators::DUP => 1, + Operators::SWAP => 0, + Operators::CALL_1 => -1, + Operators::CHECK_TYPE => -2, + Operators::MAKE_FUNC => -1, + Operators::YIELD => -1, + Operators::RAW_PRINT => 0, + Operators::NOP => 0, + _ => panic!("This opcode must take an operand.") + }; + } + } + panic!("Uncovered opcode.") + } +} + impl fmt::Display for Instr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let s = match &self { - Instr::Operand(n) => format!("{: >5}\n", n), + Instr::Operand(n) => format!("{: >4} (0x{:04x})\n", n, n), Instr::Operator(n) => { let op_str = &Operators::from_u8(*n).unwrap().to_string(); if op_str.ends_with("\n") { - format!("({:08b}):{}", n, op_str) + format!("(0x{:02x}):{}", n, op_str) } else { - format!("({:08b}):{: <11}", n, op_str) + format!("(0x{:02x}):{: <16}", n, op_str) } } }; @@ -28,45 +72,62 @@ impl fmt::Display for Instr { #[repr(u8)] #[allow(non_camel_case_types)] -#[derive(Primitive)] +#[derive(Primitive, Clone, Copy)] pub enum Operators { - HALT = 0, - PUSH_CONST = 1, - PUSH_LOCAL = 2, - PUSH_SUPER = 3, - POP = 4, - STORE_LOCAL = 5, - DUP = 6, - DUP_N = 7, - SWAP = 8, - CALL_1 = 9, - CHECK_TYPE = 10, - CAST = 11, - MAKE_FUNC = 12, - - N_ADD = 40, - I_ADD = 41, - R_ADD = 42, - U_ADD = 43, - CONCAT = 44, - N_SUB = 45, - I_SUB = 46, - R_SUB = 47, - U_SUB = 48, - N_MUL = 49, - I_MUL = 50, - R_MUL = 51, - U_MUL = 52, - N_DIV = 53, - I_DIV = 54, - R_DIV = 55, - U_DIV = 56, + HALT = 0, // TAKES 1 OPERAND(s) + PUSH_CONST = 1, // TAKES 1 OPERAND(s) + PUSH_LOCAL = 2, // TAKES 1 OPERAND(s) + PUSH_SUPER = 3, // TAKES 1 OPERAND(s) + POP = 4, // TAKES 0 OPERAND(s) + STORE_LOCAL = 5, // TAKES 1 OPERAND(s) + DUP = 6, // TAKES 0 OPERAND(s) + DUP_N = 7, // TAKES 1 OPERAND(s) + SWAP = 8, // TAKES 0 OPERAND(s) + CALL_1 = 9, // TAKES 0 OPERAND(s) + CHECK_TYPE = 10, // TAKES 0 OPERAND(s) + CAST = 11, // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each) + MAKE_FUNC = 12, // TAKES 0 OPERAND(s) + YIELD = 13, // TAKES 0 OPERAND(s) + RAW_PRINT = 14, // TAKES 0 OPERAND(s) + + N_ADD = 40, // TAKES 0 OPERAND(s) + I_ADD = 41, // TAKES 0 OPERAND(s) + R_ADD = 42, // TAKES 0 OPERAND(s) + U_ADD = 43, // TAKES 0 OPERAND(s) + CONCAT = 44, // TAKES 0 OPERAND(s) + N_SUB = 45, // TAKES 0 OPERAND(s) + I_SUB = 46, // TAKES 0 OPERAND(s) + R_SUB = 47, // TAKES 0 OPERAND(s) + U_SUB = 48, // TAKES 0 OPERAND(s) + N_MUL = 49, // TAKES 0 OPERAND(s) + I_MUL = 50, // TAKES 0 OPERAND(s) + R_MUL = 51, // TAKES 0 OPERAND(s) + U_MUL = 52, // TAKES 0 OPERAND(s) + N_DIV = 53, // TAKES 0 OPERAND(s) + I_DIV = 54, // TAKES 0 OPERAND(s) + R_DIV = 55, // TAKES 0 OPERAND(s) + U_DIV = 56, // TAKES 0 OPERAND(s) // Misc- / Meta-codes - SET_LINE = 254, - NOP = 255, + SET_LINE = 254, // TAKES 1 OPERAND(s) + NOP = 255, // TAKES 0 OPERAND(s) } +impl Operators { + pub fn takes_operand(&self) -> bool { + match self { + Operators::HALT + | Operators::PUSH_CONST + | Operators::PUSH_LOCAL + | Operators::PUSH_SUPER + | Operators::STORE_LOCAL + | Operators::DUP_N + | Operators::CAST + | Operators::SET_LINE => true, + _ => false + } + } +} impl fmt::Display for Operators { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -84,6 +145,8 @@ impl fmt::Display for Operators { Operators::CHECK_TYPE => "CHECK_TYPE\n", Operators::CAST => "CAST", Operators::MAKE_FUNC => "MAKE_FUNC\n", + Operators::YIELD => "YIELD\n", + Operators::RAW_PRINT => "RAW_PRINT\n", Operators::N_ADD => "N_ADD\n", Operators::I_ADD => "I_ADD\n", diff --git a/src/compiler/marshal.rs b/src/compiler/marshal.rs @@ -0,0 +1,36 @@ +use std::fs::File; +use std::io::{Write, Error}; + +use super::element; +use super::instructions; +use super::block; + +use element::{Element, Symbol}; +use instructions::{Instr, Operators}; + +// This ain't gonna be fun. + + +fn mk_bin_file(name : &str, bytes : Vec<u8>) -> File { + let mut file = File::create(name).expect("Could not create binary."); + file.write(&bytes.as_ref()); + file +} + +fn marshal_instructions(instrs : &Vec<Instr>) -> Vec<u8> { + let mut bytes : Vec<u8> = vec![]; + for instr in instrs { + match *instr { + Instr::Operator(o) => bytes.push(o), + Instr::Operand(o) => bytes.append(&mut vec![(o >> 8) as u8, o as u8]) + }; + } + bytes +} + +pub fn make_binary(blk : &block::LocalBlock) -> String { + let instrs = marshal_instructions(&blk.instructions); + let file = mk_bin_file(&blk.name, instrs); + + blk.name.to_owned() +}+ \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs @@ -14,12 +14,19 @@ pub mod syntax; /// instructions for the Brokkr VM, and marshals the instructions. pub mod compiler; -pub fn parse(filename : &str) { - let root = syntax::parse_file(filename); - let mut code_block = compiler::block::LocalBlock::new("<main>", filename); +pub fn parse(filename : &str) -> syntax::ast::Root { + syntax::parse_file(filename) +} +pub fn compile<'a>(root : &'a syntax::ast::Root) -> compiler::block::LocalBlock<'a> { + let mut code_block = compiler::block::LocalBlock::new("<main>", &root.filename); code_block.generate(&root.branches); - println!("Code Blocks:\n{}", code_block) + println!("Code Blocks:\n{}", code_block); + code_block } +pub fn binary_gen(block : &compiler::block::LocalBlock) -> String { + compiler::marshal::make_binary(block); + block.name.to_owned() +}+ \ No newline at end of file diff --git a/src/syntax/analyser.rs b/src/syntax/analyser.rs @@ -22,10 +22,12 @@ fn const_fold(node : &Nodes) -> Nodes { callee: Box::new(Nodes::Call(ast::CallNode { callee: Box::new(const_fold(&*call.callee.call().unwrap().callee)), operands: vec![left.clone()], - return_type: call.callee.yield_type() + return_type: call.callee.yield_type(), + location: call.callee.call().unwrap().location })), operands: vec![right.clone()], - return_type: call.return_type.clone() + return_type: call.return_type.clone(), + location: call.location }); let is_num_left = left.num().is_some(); @@ -48,7 +50,7 @@ fn const_fold(node : &Nodes) -> Nodes { return default; } }; - return Nodes::Num(ast::NumNode { value }); + return Nodes::Num(ast::NumNode { value, location: call.location }); } else { return default; } @@ -56,7 +58,8 @@ fn const_fold(node : &Nodes) -> Nodes { return Nodes::Call(ast::CallNode { callee: Box::new(const_fold(&*call.callee)), operands: vec![const_fold(&call.operands[0])], - return_type: call.return_type.clone() + return_type: call.return_type.clone(), + location: call.location }); } return node.to_owned(); @@ -73,9 +76,11 @@ fn create_cast(node : &Nodes, cast : &ast::StaticTypes) -> Nodes { let mut cast_node = ast::CallNode::new( ast::CallNode::new( - ast::IdentNode::new("cast"), - vec![node.clone()]), - vec![ast::SymNode::new(to_type)]); + ast::IdentNode::new("cast", node.location()), + vec![node.clone()], + node.location()), + vec![ast::SymNode::new(to_type, node.location())], + node.location()); if let Nodes::Call(ref mut call) = cast_node { call.set_return_type(cast.clone()) } @@ -117,13 +122,16 @@ fn balance_types(node : &Nodes) -> Nodes { if casting_right { new_call = ast::CallNode::new( *call.callee.clone(), - vec![create_cast(&right, &cast_to)]); + vec![create_cast(&right, &cast_to)], + call.callee.location()); } else { new_call = ast::CallNode::new( ast::CallNode::new( *call.callee.call().unwrap().callee.clone(), - vec![create_cast(&left, &cast_to)]), - vec![right]); + vec![create_cast(&left, &cast_to)], + call.callee.location()), + vec![right], + call.location); } if let Nodes::Call(ref mut c) = new_call { c.set_return_type(cast_to); @@ -142,7 +150,8 @@ fn balance_types(node : &Nodes) -> Nodes { if cast_strength(&left_yield) > cast_strength(&right_yield) { let mut new_call = ast::CallNode::new( *call.callee.clone(), - vec![create_cast(&right, &left_yield)]); + vec![create_cast(&right, &left_yield)], + call.callee.location()); if let Nodes::Call(ref mut c) = new_call { c.set_return_type(left_yield); } @@ -153,7 +162,8 @@ fn balance_types(node : &Nodes) -> Nodes { } let mut non_bi = ast::CallNode::new( balance_types(&*call.callee), - vec![balance_types(&call.operands[0])]); + vec![balance_types(&call.operands[0])], + call.callee.location()); if let Nodes::Call(ref mut c) = non_bi { c.set_return_type(call.return_type.clone()); } @@ -180,8 +190,8 @@ impl TypeChecker { pub fn type_branch(&mut self, node : &Nodes) -> Nodes { let mut clone = node.to_owned(); + self.source_line = clone.location().line as usize; match clone { - Nodes::Line(l) => self.source_line = l.line, Nodes::File(f) => self.source_file = f.filename.to_owned(), Nodes::Ident(ref mut i) => { if let Some(annotation) = self.ident_map.get(&i.value) { diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs @@ -1,6 +1,9 @@ use std::{fmt, ops}; use std::collections::VecDeque; +use super::location; +use location::Loc; + /// Identifiers, node representing a name that /// will represent a value stored. #[derive(Clone)] @@ -9,7 +12,10 @@ pub struct IdentNode { pub value : String, /// Type it holds. - pub static_type : StaticTypes + pub static_type : StaticTypes, + + /// Source location. + pub location : Loc, } /// Different types of possible number types in the language. @@ -189,7 +195,10 @@ impl fmt::Display for Numerics { #[derive(Clone)] pub struct NumNode { /// Holds a the numeric value. - pub value : Numerics + pub value : Numerics, + + /// Source location. + pub location : Loc, } @@ -197,7 +206,10 @@ pub struct NumNode { #[derive(Clone)] pub struct StrNode { /// Contents of the utf-8 string. - pub value : String + pub value : String, + + /// Source location. + pub location : Loc, } /// Symbol Node. @@ -205,7 +217,10 @@ pub struct StrNode { pub struct SymNode { /// Value/name stored as a string and /// excludes the colon (:) in front. - pub value : String + pub value : String, + + /// Source location. + pub location : Loc, } /// Call Node has a pointer to the callee node @@ -218,7 +233,10 @@ pub struct CallNode { pub operands : Vec<Nodes>, /// What type it returns. - pub return_type : StaticTypes + pub return_type : StaticTypes, + + /// Source location. + pub location : Loc, } /// Represents a block of code / compound statements @@ -226,21 +244,24 @@ pub struct CallNode { #[derive(Clone)] pub struct BlockNode { /// Pointer to list of nodes in the code block. - pub statements : Vec<Nodes> -} + pub statements : Vec<Nodes>, -#[derive(Clone)] -pub struct LineNode { - pub line : usize + /// Source location. + pub location : Loc, } #[derive(Clone)] pub struct FileNode { - pub filename : String + pub filename : String, + /// Source location. + pub location : Loc, } #[derive(Clone)] -pub struct EmptyNode; +pub struct EmptyNode { + /// Source location. + pub location : Loc, +} /// All base types, determined at compile time. #[derive(Debug, Clone, PartialEq)] @@ -311,7 +332,6 @@ pub enum Nodes { Sym(SymNode), Call(CallNode), Block(BlockNode), - Line(LineNode), File(FileNode), Empty(EmptyNode), } @@ -329,8 +349,7 @@ impl fmt::Display for Nodes { "%call{{\n :yield {}\n :callee ({})\n :operands [|\n {}\n |]\n}}", yt, node.callee, node.operands.iter().map(Nodes::to_string).collect::<Vec<String>>().join("\n ")), Nodes::Block(_) => format!("%block{{ ... }}"), - Nodes::Line(node) => format!("%newline{{ :line {} }}", node.line), - Nodes::File(node) => format!("%newfile{{ :filename {} }}", node.filename), + Nodes::File(node) => format!("%file{{ :filename {} }}", node.filename), Nodes::Empty(_) => String::from("()"), }; write!(f, "{}", printable) @@ -348,6 +367,18 @@ macro_rules! unwrap_enum { impl Nodes { + pub fn location(&self) -> Loc { + match self { + Nodes::Ident(n) => n.location, + Nodes::Call(n) => n.location, + Nodes::Num(n) => n.location, + Nodes::Str(n) => n.location, + Nodes::Sym(n) => n.location, + Nodes::Empty(n) => n.location, + Nodes::Block(n) => n.location, + Nodes::File(n) => n.location, + } + } /// Function that returns the statically known type /// of any syntactic node generated. pub fn yield_type(&self) -> StaticTypes { @@ -399,7 +430,6 @@ impl Nodes { call.return_type.to_owned() }, Nodes::Block(_) - | Nodes::Line(_) | Nodes::File(_) => StaticTypes::TUnknown, Nodes::Empty(_) => StaticTypes::TNil, } @@ -441,7 +471,6 @@ impl Nodes { pub fn sym(&self) -> Option<&SymNode> { unwrap_enum!(self, Nodes::Sym) } pub fn call(&self) -> Option<&CallNode> { unwrap_enum!(self, Nodes::Call) } pub fn block(&self) -> Option<&BlockNode> { unwrap_enum!(self, Nodes::Block) } - pub fn line(&self) -> Option<&LineNode> { unwrap_enum!(self, Nodes::Line) } pub fn file(&self) -> Option<&FileNode> { unwrap_enum!(self, Nodes::File) } pub fn empty(&self) -> Option<&EmptyNode> { unwrap_enum!(self, Nodes::Empty) } @@ -465,35 +494,39 @@ impl Nodes { } impl IdentNode { - pub fn new(value : &str) -> Nodes { + pub fn new(value : &str, location : Loc) -> Nodes { Nodes::Ident(IdentNode { value: value.to_string(), - static_type: StaticTypes::TUnknown + static_type: StaticTypes::TUnknown, + location }) } } impl NumNode { - pub fn new<Num : ToNumeric>(number : Num) -> Nodes { + pub fn new<Num : ToNumeric>(number : Num, location : Loc) -> Nodes { let value = number.to_numeric(); - Nodes::Num(NumNode { value }) + Nodes::Num(NumNode { value, location }) } } impl StrNode { - pub fn new(value : &str) -> Nodes { Nodes::Str(StrNode { value: value.to_string() }) } + pub fn new(value : &str, location : Loc) -> Nodes + { Nodes::Str(StrNode { value: value.to_string(), location }) } } impl SymNode { - pub fn new(value : &str) -> Nodes { Nodes::Sym(SymNode { value: value[1..].to_string() }) } + pub fn new(value : &str, location : Loc) -> Nodes + { Nodes::Sym(SymNode { value: value[1..].to_string(), location }) } } impl CallNode { - pub fn new(callee : Nodes, operands : Vec<Nodes>) -> Nodes { + pub fn new(callee : Nodes, operands : Vec<Nodes>, location : Loc) -> Nodes { Nodes::Call(CallNode { callee: Box::new(callee), operands: operands, - return_type: StaticTypes::TUnknown + return_type: StaticTypes::TUnknown, + location }) } @@ -527,26 +560,24 @@ impl CallNode { } } -impl LineNode { - pub fn new(line : usize) -> Nodes { Nodes::Line(LineNode { line }) } -} - impl FileNode { - pub fn new(filename : String) -> Nodes { Nodes::File(FileNode { filename }) } + pub fn new(filename : String, location : Loc) -> Nodes + { Nodes::File(FileNode { filename, location }) } } impl EmptyNode { - pub fn new() -> Nodes { Nodes::Empty(EmptyNode { }) } + pub fn new(location : Loc) -> Nodes { Nodes::Empty(EmptyNode { location }) } } /// Root branch of the AST. pub struct Root { - pub branches : Vec<Nodes> + pub branches : Vec<Nodes>, + pub filename : String } impl Root { - pub fn new() -> Self { - Root { branches: Vec::new() } + pub fn new(filename : &str) -> Self { + Root { branches: Vec::new(), filename: filename.to_owned() } } } diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs @@ -3,6 +3,7 @@ use token::{Token, TokenType}; use super::location; +use std::collections::VecDeque; use lazy_static::lazy_static; use regex::Regex; @@ -52,7 +53,7 @@ macro_rules! try_match { $current_char_ptr:expr, $line:expr, $col:expr) => { if let Some(matched) = $reg.first_match($partial) { let span = matched.width() as u32; - $stream.push(Token::new( + $stream.push_back(Token::new( $token_type, &matched, location::new($line, $col, span))); $current_char_ptr += matched.len(); @@ -63,9 +64,9 @@ macro_rules! try_match { } /// Takes a piece of code (as a &str) and returns -/// the generated token-stream (as a Vec<Token>). -pub fn lex(string : &str) -> Vec<Token> { - let mut token_stream : Vec<Token> = Vec::new(); +/// the generated token-stream (as a VecDeque<Token>). +pub fn lex(string : &str) -> VecDeque<Token> { + let mut token_stream : VecDeque<Token> = VecDeque::new(); let mut current_char_ptr = 0; let string_size = string.bytes().count(); @@ -111,7 +112,7 @@ pub fn lex(string : &str) -> Vec<Token> { _ => None }; if let Some(tt) = vec_brack { - token_stream.push(Token::new( + token_stream.push_back(Token::new( tt, two_chars, location::new(line, col, 2))); col += 2; @@ -120,7 +121,7 @@ pub fn lex(string : &str) -> Vec<Token> { } if two_chars == ": " { - token_stream.push(Token::new( + token_stream.push_back(Token::new( TokenType::Op, ":", location::new(line, col, 1))); col += 2; @@ -143,7 +144,7 @@ pub fn lex(string : &str) -> Vec<Token> { }; if let Some(tt) = single_char_token { - token_stream.push(Token::new( + token_stream.push_back(Token::new( tt, &first_char.to_string(), location::new(line, col, 1))); if first_char == '\n' { @@ -211,7 +212,7 @@ pub fn lex(string : &str) -> Vec<Token> { current_char_ptr += 1; col += 1; } - token_stream.push(Token::new( + token_stream.push_back(Token::new( TokenType::Str, &contents, location::new(line, old_col, col - old_col))); continue; @@ -237,7 +238,7 @@ pub fn lex(string : &str) -> Vec<Token> { if partial.is_char_boundary(0) { col += 1 } } - token_stream.push(Token::new( + token_stream.push_back(Token::new( TokenType::EOF, "\0", location::new(line, col, 1))); token_stream diff --git a/src/syntax/location.rs b/src/syntax/location.rs @@ -1,8 +1,10 @@ /// Holds line, column and span of a lexical token. -#[derive(Clone)] +#[derive(Clone, Copy)] pub struct Loc { /// Line number. pub line : u32, + /// Number of lines. + pub lines : u32, /// Column number. pub col : u32, /// Span/Width (in characters) of token. @@ -11,6 +13,6 @@ pub struct Loc { /// Construct new Loc structure. pub fn new(line : u32, col : u32, span : u32) -> Loc { - Loc { line, col, span } + Loc { line, lines: 1, col, span } } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs @@ -1,51 +1,53 @@ +use std::collections::VecDeque; + +use super::location; use super::token; use super::ast; use super::operators; use super::super::err; +use location::Loc; + use token::{Token, TokenType}; use ast::Nodes; -pub fn parse(stream : Vec<Token>, file : &str) -> ast::Root { - let mut environment = ParseEnvironment::new(stream, file); - environment.optable.new_fun("max", 4); +pub fn parse(stream : VecDeque<Token>, file : &str) -> ast::Root { + let mut environment = ParseEnvironment::new(stream, file); environment.start(); - - environment.root } struct ParseEnvironment<'a> { pub root : ast::Root, - pub stream : Vec<Token>, + pub stream : VecDeque<Token>, pub optable : operators::PrecedenceTable<'a>, pub file : &'a str, ignore_newline : bool, - line_number : usize, + location: Loc, eof_token : Token } impl<'a> ParseEnvironment<'a> { - pub fn new(stream : Vec<Token>, file : &'a str) -> Self { + pub fn new(stream : VecDeque<Token>, file : &'a str) -> Self { ParseEnvironment { - root: ast::Root::new(), - eof_token: stream.last().unwrap().to_owned(), + root: ast::Root::new(file), + eof_token: stream.iter().last().unwrap().to_owned(), stream, optable: operators::PrecedenceTable::new(), file, ignore_newline: false, - line_number: 0, + location: location::new(1, 1, 1), } } pub fn start(&mut self) { - self.root.branches.push(ast::FileNode::new(self.file.to_owned())); + self.root.branches.push(ast::FileNode::new(self.file.to_owned(), self.location)); - let mut current = self.stream.first(); + let mut current = self.stream.get(0); while current.is_some() && current.unwrap().class != TokenType::EOF { if current.unwrap().class == TokenType::Term { self.shift(); @@ -61,17 +63,10 @@ impl<'a> ParseEnvironment<'a> { fn shift(&mut self) -> Token { if self.stream.is_empty() { - self.stream.push(self.eof_token.clone()); - } - let shifted = self.stream.remove(0); - if shifted.location.line as usize != self.line_number { - if self.root.branches.last().is_some() - && self.root.branches.last().unwrap().line().is_some() { - self.root.branches.pop(); - } - self.line_number = shifted.location.line as usize; - self.root.branches.push(ast::LineNode::new(self.line_number)); + self.stream.push_back(self.eof_token.clone()); } + let shifted = self.stream.pop_front().unwrap(); + self.location = shifted.location; shifted } @@ -82,6 +77,7 @@ impl<'a> ParseEnvironment<'a> { } fn null_den(&mut self, token : &Token) -> Nodes { + let loc = token.location; match token.class { TokenType::Op | TokenType::Ident => { let is_op = self.optable.exists(&token.string); @@ -89,7 +85,7 @@ impl<'a> ParseEnvironment<'a> { let prefix = self.optable.lookup(&token.string, 1); return match self.stream[0].class { TokenType::RParen => { - ast::IdentNode::new(&token.string) + ast::IdentNode::new(&token.string, loc) }, _ => { // If the operator is prefix: @@ -99,29 +95,32 @@ impl<'a> ParseEnvironment<'a> { if prefix.is_none() { ast::CallNode::new( ast::CallNode::new( - ast::IdentNode::new("flip"), - vec![ast::IdentNode::new(&token.string)]), - vec![self.expr(500)]) + ast::IdentNode::new("flip", loc), + vec![ast::IdentNode::new(&token.string, loc)], + self.location), + vec![self.expr(500)], + self.location) } else { ast::CallNode::new( - ast::IdentNode::new(&token.string), - vec![self.expr(500)]) + ast::IdentNode::new(&token.string, loc), + vec![self.expr(500)], + self.location) } } }; } - ast::IdentNode::new(&token.string) + ast::IdentNode::new(&token.string, loc) }, - TokenType::Num => ast::NumNode::new(&*token.string), - TokenType::Str => ast::StrNode::new(&token.string), - TokenType::Sym => ast::SymNode::new(&token.string), + TokenType::Num => ast::NumNode::new(&*token.string, loc), + TokenType::Str => ast::StrNode::new( &token.string, loc), + TokenType::Sym => ast::SymNode::new( &token.string, loc), TokenType::LParen => { let current = self.stream.get(0); if current.is_none() || current.unwrap().class == TokenType::EOF { self.expect(TokenType::RParen, current) } else if current.unwrap().class == TokenType::RParen { self.shift(); - return ast::EmptyNode::new(); + return ast::EmptyNode::new(loc); } @@ -186,21 +185,24 @@ impl<'a> ParseEnvironment<'a> { _ => () }; if pushed { return left; } - ast::CallNode::new(left, vec![self.expr(190)]) + ast::CallNode::new(left, vec![self.expr(190)], self.location) } fn left_den(&mut self, left : Nodes, op : operators::Operator) -> Nodes { - let first_apply = ast::CallNode::new(ast::IdentNode::new(op.name), vec![left]); + let first_apply = ast::CallNode::new( + ast::IdentNode::new(op.name, self.location), + vec![left], + self.location); if self.stream[0].class == TokenType::RParen { return first_apply; } let right = self.expr(op.precedence - (if op.is_right() { 1 } else { 0 })); - ast::CallNode::new(first_apply, vec![right]) + ast::CallNode::new(first_apply, vec![right], self.location) } fn expect(&self, tt : TokenType, maybe_t : Option<&Token>) { if maybe_t.is_none() { - issue!(err::Types::ParseError, self.file, self.stream.last().unwrap(), + issue!(err::Types::ParseError, self.file, self.stream.iter().last().unwrap(), "Unexpected end of stream."); } let t = maybe_t.unwrap(); @@ -218,45 +220,46 @@ mod test { #[test] fn numeric_parsing() { - assert_eq!(ast::NumNode::new(2).num().unwrap().value, Numerics::Natural(2usize)); - assert_eq!(ast::NumNode::new(2usize).num().unwrap().value, Numerics::Natural(2usize)); - assert_eq!(ast::NumNode::new(2u32).num().unwrap().value, Numerics::Natural(2usize)); - assert_eq!(ast::NumNode::new(2i32).num().unwrap().value, Numerics::Natural(2usize)); - - assert_eq!(ast::NumNode::new(-2).num().unwrap().value, Numerics::Integer(-2isize)); - assert_eq!(ast::NumNode::new(-2i32).num().unwrap().value, Numerics::Integer(-2isize)); - assert_eq!(ast::NumNode::new(-2isize).num().unwrap().value, Numerics::Integer(-2isize)); - - assert_eq!(ast::NumNode::new(-2.62).num().unwrap().value, Numerics::Real(-2.62f64)); - assert_eq!(ast::NumNode::new(2.62).num().unwrap().value, Numerics::Real(2.62f64)); - - assert_eq!(ast::NumNode::new("2").num().unwrap().value, Numerics::Natural(2)); - assert_eq!(ast::NumNode::new("325").num().unwrap().value, Numerics::Natural(325)); - assert_eq!(ast::NumNode::new("0b01010110").num().unwrap().value, Numerics::Natural(0b01010110)); - assert_eq!(ast::NumNode::new("0o721").num().unwrap().value, Numerics::Natural(0o721)); - assert_eq!(ast::NumNode::new("0xfa").num().unwrap().value, Numerics::Natural(0xfa)); - assert_eq!(ast::NumNode::new("0xf").num().unwrap().value, Numerics::Natural(0xf)); - assert_eq!(ast::NumNode::new("2.672").num().unwrap().value, Numerics::Real(2.672)); - assert_eq!(ast::NumNode::new("2.672e3").num().unwrap().value, Numerics::Real(2672.0)); - assert_eq!(ast::NumNode::new("2.672e+16").num().unwrap().value, Numerics::Real(2.672 * 10f64.powf(16f64))); - assert_eq!(ast::NumNode::new("2.672e-10").num().unwrap().value, Numerics::Real(2.672 * 10f64.powf(-10f64))); - assert_eq!(ast::NumNode::new("67e-4").num().unwrap().value, Numerics::Real(0.0067)); - assert_eq!(ast::NumNode::new("67e+10").num().unwrap().value, Numerics::Natural(670000000000)); - assert_eq!(ast::NumNode::new("-2").num().unwrap().value, Numerics::Integer(-2)); - assert_eq!(ast::NumNode::new("-325").num().unwrap().value, Numerics::Integer(-325)); - assert_eq!(ast::NumNode::new("-0b01010110").num().unwrap().value, Numerics::Integer(-0b01010110)); - assert_eq!(ast::NumNode::new("-0o721").num().unwrap().value, Numerics::Integer(-0o721)); - assert_eq!(ast::NumNode::new("-0xfa").num().unwrap().value, Numerics::Integer(-250)); - assert_eq!(ast::NumNode::new("-0xf").num().unwrap().value, Numerics::Integer(-15)); - assert_eq!(ast::NumNode::new("-2.672").num().unwrap().value, Numerics::Real(-2.672)); - assert_eq!(ast::NumNode::new("-2.672e3").num().unwrap().value, Numerics::Real(-2672.0)); - assert_eq!(ast::NumNode::new("-2.672e+16").num().unwrap().value, Numerics::Real(-26720000000000000.0)); - assert_eq!(ast::NumNode::new("-2.672e-10").num().unwrap().value, Numerics::Real(-0.0000000002672)); - assert_eq!(ast::NumNode::new("-67e-4").num().unwrap().value, Numerics::Real(-0.0067)); - assert_eq!(ast::NumNode::new("-67e+10").num().unwrap().value, Numerics::Integer(-670000000000)); + let l = location::new(1, 1, 1); + assert_eq!(ast::NumNode::new(2, l).num().unwrap().value, Numerics::Natural(2usize)); + assert_eq!(ast::NumNode::new(2usize, l).num().unwrap().value, Numerics::Natural(2usize)); + assert_eq!(ast::NumNode::new(2u32, l).num().unwrap().value, Numerics::Natural(2usize)); + assert_eq!(ast::NumNode::new(2i32, l).num().unwrap().value, Numerics::Natural(2usize)); + + assert_eq!(ast::NumNode::new(-2, l).num().unwrap().value, Numerics::Integer(-2isize)); + assert_eq!(ast::NumNode::new(-2i32, l).num().unwrap().value, Numerics::Integer(-2isize)); + assert_eq!(ast::NumNode::new(-2isize, l).num().unwrap().value, Numerics::Integer(-2isize)); + + assert_eq!(ast::NumNode::new(-2.62, l).num().unwrap().value, Numerics::Real(-2.62f64)); + assert_eq!(ast::NumNode::new(2.62, l).num().unwrap().value, Numerics::Real(2.62f64)); + + assert_eq!(ast::NumNode::new("2", l).num().unwrap().value, Numerics::Natural(2)); + assert_eq!(ast::NumNode::new("325", l).num().unwrap().value, Numerics::Natural(325)); + assert_eq!(ast::NumNode::new("0b01010110", l).num().unwrap().value, Numerics::Natural(0b01010110)); + assert_eq!(ast::NumNode::new("0o721", l).num().unwrap().value, Numerics::Natural(0o721)); + assert_eq!(ast::NumNode::new("0xfa", l).num().unwrap().value, Numerics::Natural(0xfa)); + assert_eq!(ast::NumNode::new("0xf", l).num().unwrap().value, Numerics::Natural(0xf)); + assert_eq!(ast::NumNode::new("2.672", l).num().unwrap().value, Numerics::Real(2.672)); + assert_eq!(ast::NumNode::new("2.672e3", l).num().unwrap().value, Numerics::Real(2672.0)); + assert_eq!(ast::NumNode::new("2.672e+16", l).num().unwrap().value, Numerics::Real(2.672 * 10f64.powf(16f64))); + assert_eq!(ast::NumNode::new("2.672e-10", l).num().unwrap().value, Numerics::Real(2.672 * 10f64.powf(-10f64))); + assert_eq!(ast::NumNode::new("67e-4", l).num().unwrap().value, Numerics::Real(0.0067)); + assert_eq!(ast::NumNode::new("67e+10", l).num().unwrap().value, Numerics::Natural(670000000000)); + assert_eq!(ast::NumNode::new("-2", l).num().unwrap().value, Numerics::Integer(-2)); + assert_eq!(ast::NumNode::new("-325", l).num().unwrap().value, Numerics::Integer(-325)); + assert_eq!(ast::NumNode::new("-0b01010110", l).num().unwrap().value, Numerics::Integer(-0b01010110)); + assert_eq!(ast::NumNode::new("-0o721", l).num().unwrap().value, Numerics::Integer(-0o721)); + assert_eq!(ast::NumNode::new("-0xfa", l).num().unwrap().value, Numerics::Integer(-250)); + assert_eq!(ast::NumNode::new("-0xf", l).num().unwrap().value, Numerics::Integer(-15)); + assert_eq!(ast::NumNode::new("-2.672", l).num().unwrap().value, Numerics::Real(-2.672)); + assert_eq!(ast::NumNode::new("-2.672e3", l).num().unwrap().value, Numerics::Real(-2672.0)); + assert_eq!(ast::NumNode::new("-2.672e+16", l).num().unwrap().value, Numerics::Real(-26720000000000000.0)); + assert_eq!(ast::NumNode::new("-2.672e-10", l).num().unwrap().value, Numerics::Real(-0.0000000002672)); + assert_eq!(ast::NumNode::new("-67e-4", l).num().unwrap().value, Numerics::Real(-0.0067)); + assert_eq!(ast::NumNode::new("-67e+10", l).num().unwrap().value, Numerics::Integer(-670000000000)); let s : String = String::from("-6e12"); - let num = ast::NumNode::new(&*s); + let num = ast::NumNode::new(&*s, l); assert_eq!(num.num().unwrap().value, Numerics::Integer(-6000000000000)); } diff --git a/src/syntax/token.rs b/src/syntax/token.rs @@ -1,4 +1,4 @@ -use std::fmt; +use std::{fmt, collections::VecDeque}; use super::location; use snailquote::escape; @@ -117,7 +117,7 @@ pub trait ShowStream { /// String representation of token-stream. fn to_string(&self) -> String; } -impl ShowStream for Vec<Token> { +impl ShowStream for VecDeque<Token> { fn to_string(&self) -> String { let lines : Vec<String> = self.iter().map(Token::to_string).collect(); format!("[ {} ]", lines.join(",\n ")) diff --git a/test.vh b/test.vh @@ -1,15 +1,5 @@ -a : Nat -> Nat -> Int -a n m = n - 2*m +a : Nat +a = 3 -a 1 2 +__raw_print (a + 2.5) --- a = n |-> (m |-> n + 2*m) --- |_____________| --- | --- func: `a__1` --- |_____________________| --- | --- func: `a__0` --- |__________________________| --- | --- func: a- \ No newline at end of file