valhallac

Compiler for set-theoretic programming language.
git clone git://git.knutsen.co/valhallac
Log | Files | Refs | README | LICENSE

commit 1342459f378254412051a14b6512bedbbaf879d6
parent 40d6b6e28e9d9411f8dad515a6c6530fa3bad4ca
Author: Demonstrandum <moi@knutsen.co>
Date:   Mon, 29 Jul 2019 15:30:38 +0100

Partial static types.

Diffstat:
Msrc/compiler/block.rs | 56+++++++++++++++++++++++++++-----------------------------
Dsrc/compiler/casts.rs | 59-----------------------------------------------------------
Msrc/compiler/element.rs | 8+++++++-
Msrc/compiler/instructions.rs | 3++-
Msrc/compiler/internal_functions.rs | 68+++++++++++++++++++++++++++++++++++++++-----------------------------
Msrc/compiler/mod.rs | 11+++++------
Msrc/err.rs | 6+++---
Msrc/lib.rs | 12++++--------
Msrc/syntax/ast.rs | 35++++++++++++++++++++++++++++++++++-
Msrc/syntax/mod.rs | 2+-
Msrc/syntax/parser.rs | 2+-
11 files changed, 123 insertions(+), 139 deletions(-)

diff --git a/src/compiler/block.rs b/src/compiler/block.rs @@ -10,23 +10,33 @@ use element::{Element, Symbol}; use instructions::{Instr, Operators}; use super::internal_functions; -use super::casts; fn append_unique<'a>(v : &mut Vec<Element<'a>>, e : Element<'a>) -> usize { let index = v.iter().position(|c| c == &e); - if index.is_none() { v.push(e); } + if index.is_none() { v.push(e.clone()); } index.unwrap_or(v.len() - 1) } +pub fn numerics_to_element<'a>(num : &ast::Numerics) -> Element<'a> { + match num { + ast::Numerics::Natural(n) => Element::ENatural(*n), + ast::Numerics::Integer(n) => Element::EInteger(*n), + ast::Numerics::Real(n) => Element::EReal(*n) + } +} + +#[derive(Clone, PartialEq)] pub struct LocalBlock<'a> { + pub name : &'a str, constants : Vec<Element<'a>>, locals : Vec<Element<'a>>, instructions : Vec<Instr> } impl<'a> LocalBlock<'a> { - pub fn new() -> Self { + pub fn new(name : &'a str) -> Self { LocalBlock { + name, constants: vec![], locals: vec![], instructions: vec![] @@ -42,40 +52,28 @@ impl<'a> LocalBlock<'a> { fn emit(&mut self, node : &'a ast::Nodes) { match node { ast::Nodes::Num(num_node) => { - let elem = casts::numerics_to_element(&num_node.value); - self.push_const_instr(elem); + self.push_const_instr(numerics_to_element(&num_node.value)); + }, + ast::Nodes::Str(str_node) => { + self.push_const_instr(Element::EString(&str_node.value)); + }, + ast::Nodes::Sym(sym_node) => { + self.push_const_instr(Element::ESymbol(Symbol::new(&sym_node.value))); }, - ast::Nodes::Str(str_node) => self.push_const_instr(Element::EString(str_node.value.to_owned())), - ast::Nodes::Sym(sym_node) => self.push_const_instr(Element::ESymbol(Symbol::new(&sym_node.value))), ast::Nodes::Call(call_node) => { if call_node.is_binary() { let ident = call_node.callee.call().unwrap().callee.ident().unwrap(); + let args = vec![ + &call_node.operands[0], &call_node.callee.call().unwrap().operands[0], - &call_node.operands[0] ]; - // each_and_every_one - if args.iter().all(|n| n.is_numeric()) { - let nums = args.iter().map(|node| { - casts::numerics_to_element(&node.num().unwrap().value) - }).collect::<Vec<Element>>(); - let casted_args = casts::try_cast(nums); - if let Some(cast_succ) = casted_args { - let inop = internal_functions::get_internal_op(&ident.value, Some(&cast_succ)); - if let Some(op) = inop { - self.push_const_instr(cast_succ[0].clone()); - self.push_const_instr(cast_succ[1].clone()); - self.instructions.push(op); - } - } - } else { - let inop = internal_functions::get_internal_op(&ident.value, None); - if let Some(op) = inop { - self.emit(args[0]); - self.emit(args[1]); - self.instructions.push(op) - } + let inop = internal_functions::get_internal_op(&ident.value, Some(&args)); + if let Some(op) = inop { + self.emit(args[0]); + self.emit(args[1]); + self.instructions.push(op) } } }, diff --git a/src/compiler/casts.rs b/src/compiler/casts.rs @@ -1,58 +0,0 @@ -use super::element; -use element::Element; - -use super::super::syntax; -use syntax::ast; - -pub fn numerics_to_element<'a>(num : &ast::Numerics) -> Element<'a> { - match num { - ast::Numerics::Natural(n) => Element::ENatural(*n), - ast::Numerics::Integer(n) => Element::EInteger(*n), - ast::Numerics::Real(n) => Element::EReal(*n) - } -} - -pub enum Casts { // In order of cast strength. - REAL, - INT, - NAT -} - -macro_rules! conversion { - ($arg:expr, $to:path, $base:ident) => { - match $arg { - Element::ENatural(n) => $to(*n as $base), - Element::EInteger(n) => $to(*n as $base), - Element::EReal(n) => $to(*n as $base), - _ => panic!("Internal error, tried to cast non-numeric to numeric.") - }; - }; -} - -pub fn cast_to<'a> (cast : Casts, args : &Vec<Element>) -> Vec<Element<'a>> { - let mut new_args : Vec<Element> = vec![]; - for arg in args { - let new = match cast { - Casts::REAL => conversion!(arg, Element::EReal, f64), - Casts::INT => conversion!(arg, Element::EInteger, isize), - Casts::NAT => conversion!(arg, Element::ENatural, usize), - }; - new_args.push(new); - } - new_args -} - -pub fn try_cast<'a>(args : Vec<Element<'a>>) -> Option<Vec<Element<'a>>> { - if args.iter().all(Element::is_numeric) { - for arg in &args { - let converted = match arg { - Element::EReal(_) => Some(cast_to(Casts::REAL, &args)), - Element::EInteger(_) => Some(cast_to(Casts::INT, &args)), - _ => None - }; - if let Some(v) = converted { return Some(v); } - } - return Some(cast_to(Casts::NAT, &args)); - } - None -}- \ No newline at end of file diff --git a/src/compiler/element.rs b/src/compiler/element.rs @@ -4,6 +4,8 @@ use std::hash::{Hash, Hasher}; use snailquote::escape; +use super::block; + #[derive(Clone, Copy)] pub struct Symbol<'a> { hash : u64, @@ -42,8 +44,10 @@ pub enum Element<'a> { ENatural(usize), EInteger(isize), EReal(f64), - EString(String), + EString(&'a str), ESymbol(Symbol<'a>), + ECode(block::LocalBlock<'a>), + ENil } impl<'a> Element<'a> { @@ -66,6 +70,8 @@ impl<'a> fmt::Display for Element<'a> { Element::EReal(t) => format!("{: <5} => (Real) ", if t.fract() == 0f64 { format!("{:.1}", t) } else { f64::to_string(t) }), Element::EString(t) => format!("{: <5} => (String)", format!("\"{}\"", escape(t))), Element::ESymbol(t) => format!("{: <5} => (Sym) ", t.to_string()), + Element::ECode(t) => format!("{: <5} => (Block) ", t.name), + Element::ENil => format!("{: <5} => (Nil) ", "nil"), }; write!(f, "{}", s) } diff --git a/src/compiler/instructions.rs b/src/compiler/instructions.rs @@ -1,8 +1,9 @@ use std::fmt; +use enum_primitive_derive::Primitive; use num_traits::{FromPrimitive, ToPrimitive}; -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum Instr { Operator(u8), Operand(u16) diff --git a/src/compiler/internal_functions.rs b/src/compiler/internal_functions.rs @@ -1,49 +1,59 @@ -use super::element; -use element::Element; - use super::instructions; use instructions::{Instr, Operators}; -use super::casts; +use super::super::syntax; +use syntax::ast; /// Gets the appropriate operator for the internal functions. /// Assumes all args have equal type. -pub fn get_internal_op(ident : &str, args : Option<&Vec<Element>>) -> Option<Instr> { +pub fn get_internal_op(ident : &str, args : Option<&Vec<&ast::Nodes>>) -> Option<Instr> { + let mut first = ast::BaseTypes::TUnknown; + let mut is_uni = args.is_none(); + if !is_uni { + let unwrapped = args.unwrap(); + first = unwrapped[0].yield_type(); + is_uni = !unwrapped.iter().all(|e| e.yield_type() == first); + } + match ident { "+" => { - if args.is_none() { return Some(Instr::Operator(Operators::U_ADD as u8)); } - Some(Instr::Operator(match args.unwrap()[0] { - Element::ENatural(_) => Operators::N_ADD, - Element::EInteger(_) => Operators::I_ADD, - Element::EReal(_) => Operators::R_ADD, - _ => Operators::U_ADD + if is_uni { return Some(Instr::Operator(Operators::U_ADD as u8)); } + + Some(Instr::Operator(match first { + ast::BaseTypes::TNatural => Operators::N_ADD, + ast::BaseTypes::TInteger => Operators::I_ADD, + ast::BaseTypes::TReal => Operators::R_ADD, + _ => Operators::U_ADD } as u8)) }, "-" => { - if args.is_none() { return Some(Instr::Operator(Operators::U_SUB as u8)); } - Some(Instr::Operator(match args.unwrap()[0] { - Element::ENatural(_) => Operators::N_SUB, - Element::EInteger(_) => Operators::I_SUB, - Element::EReal(_) => Operators::R_SUB, - _ => Operators::U_SUB + if is_uni { return Some(Instr::Operator(Operators::U_SUB as u8)); } + + Some(Instr::Operator(match first { + ast::BaseTypes::TNatural => Operators::N_SUB, + ast::BaseTypes::TInteger => Operators::I_SUB, + ast::BaseTypes::TReal => Operators::R_SUB, + _ => Operators::U_SUB } as u8)) }, "*" => { - if args.is_none() { return Some(Instr::Operator(Operators::U_MUL as u8)); } - Some(Instr::Operator(match args.unwrap()[0] { - Element::ENatural(_) => Operators::N_MUL, - Element::EInteger(_) => Operators::I_MUL, - Element::EReal(_) => Operators::R_MUL, - _ => Operators::U_MUL + if is_uni { return Some(Instr::Operator(Operators::U_MUL as u8)); } + + Some(Instr::Operator(match first { + ast::BaseTypes::TNatural => Operators::N_MUL, + ast::BaseTypes::TInteger => Operators::I_MUL, + ast::BaseTypes::TReal => Operators::R_MUL, + _ => Operators::U_MUL } as u8)) }, "/" => { - if args.is_none() { return Some(Instr::Operator(Operators::U_DIV as u8)); } - Some(Instr::Operator(match args.unwrap()[0] { - Element::ENatural(_) => Operators::N_DIV, - Element::EInteger(_) => Operators::I_DIV, - Element::EReal(_) => Operators::R_DIV, - _ => Operators::U_DIV + if is_uni { return Some(Instr::Operator(Operators::U_DIV as u8)); } + + Some(Instr::Operator(match first { + ast::BaseTypes::TNatural => Operators::N_DIV, + ast::BaseTypes::TInteger => Operators::I_DIV, + ast::BaseTypes::TReal => Operators::R_DIV, + _ => Operators::U_DIV } as u8)) } _ => None diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs @@ -1,7 +1,6 @@ //! Compilation of the syntax tree. -mod element; -mod instructions; -mod casts; -mod internal_functions; +pub mod element; +pub mod instructions; +pub mod internal_functions; pub mod block; -mod marshal;- \ No newline at end of file +pub mod marshal;+ \ No newline at end of file diff --git a/src/err.rs b/src/err.rs @@ -23,7 +23,7 @@ impl fmt::Display for Types { } } -pub fn issue(class : Types, filename : &str, token : &token::Token, message : &str) { +pub fn fissue(class : Types, filename : &str, token : &token::Token, message : &str) { let file = fs::File::open(filename).expect("Invalid filename for error message."); let line = BufReader::new(file).lines().nth((token.location.line - 1) as usize).unwrap().unwrap(); @@ -41,13 +41,13 @@ pub fn issue(class : Types, filename : &str, token : &token::Token, message : & macro_rules! issue { ($type:path, $file:expr, $token:expr, $message:expr) => { { - err::issue($type, $file, $token, $message); + err::fissue($type, $file, $token, $message); std::process::exit(1) } }; ($type:path, $file:expr, $token:expr, $message:expr, $($form:expr),*) => { { - err::issue($type, $file, $token, &format!($message, $($form),*)); + err::fissue($type, $file, $token, &format!($message, $($form),*)); std::process::exit(1) } }; diff --git a/src/lib.rs b/src/lib.rs @@ -2,25 +2,21 @@ //! the generated AST to Brokkr-bytecode for the //! Valhalla set theoretic programming language. -#[macro_use] -extern crate enum_primitive_derive; -extern crate num_traits; - /// Error messages. #[macro_use] -mod err; +pub mod err; /// Syntax submodule, responsible for lexical analysis, /// parsing and static analysis. -mod syntax; +pub mod syntax; /// Compiler, transforms AST into stack-based bytecode /// instructions for the Brokkr VM, and marshals the instructions. -mod compiler; +pub mod compiler; pub fn parse(filename : &str) { let root = syntax::parse_file(filename); - let mut code_block = compiler::block::LocalBlock::new(); + let mut code_block = compiler::block::LocalBlock::new("<main>"); code_block.generate(&root.branches); diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs @@ -2,6 +2,7 @@ use std::fmt; /// Identifiers, node representing a name that /// will represent a value stored. +#[derive(Clone)] pub struct IdentNode { /// The name of the identifer. pub value : String @@ -9,7 +10,7 @@ pub struct IdentNode { /// Different types of possible number types in the langauge. /// Max size is determined by max pointer size. -#[derive(PartialEq, Debug)] +#[derive(PartialEq, Clone, Debug)] pub enum Numerics { /// Naturals are unsigned ints. Natural(usize), @@ -104,6 +105,7 @@ impl fmt::Display for Numerics { } /// Node that represents a number. +#[derive(Clone)] pub struct NumNode { /// Holds a the numeric value. pub value : Numerics @@ -111,12 +113,14 @@ pub struct NumNode { /// Node for holding strings. +#[derive(Clone)] pub struct StrNode { /// Contents of the utf-8 string. pub value : String } /// Symbol Node. +#[derive(Clone)] pub struct SymNode { /// Value/name stored as a string and /// excludes the colon (:) in front. @@ -125,6 +129,7 @@ pub struct SymNode { /// Call Node has a pointer to the callee node /// and a list of operand nodes. +#[derive(Clone)] pub struct CallNode { /// Pointer to heap allocated calling node. pub callee : Box<Nodes>, @@ -134,14 +139,26 @@ pub struct CallNode { /// Represents a block of code / compound statements /// in order of when they will be executed. +#[derive(Clone)] pub struct BlockNode { /// Pointer to list of nodes in the code block. pub statements : Vec<Nodes> } +#[derive(Clone)] pub struct EmptyNode; +/// All base types, determined at compile time. +#[derive(Clone, Copy, PartialEq)] +pub enum BaseTypes { + TNatural, TInteger, TReal, + TString, TSym, + + TUnknown +} + /// All node types. +#[derive(Clone)] pub enum Nodes { Ident(IdentNode), Num(NumNode), @@ -181,6 +198,22 @@ macro_rules! unwrap_enum { impl Nodes { + pub fn yield_type(&self) -> BaseTypes { + match self { + Nodes::Num(nn) => { + match nn.value { + Numerics::Natural(_) => BaseTypes::TNatural, + Numerics::Integer(_) => BaseTypes::TInteger, + Numerics::Real(_) => BaseTypes::TReal, + } + }, + Nodes::Str(_) => BaseTypes::TString, + Nodes::Sym(_) => BaseTypes::TSym, + + _ => BaseTypes::TUnknown + } + } + pub fn ident(&self) -> Option<&IdentNode> { unwrap_enum!(self, Nodes::Ident) } pub fn num(&self) -> Option<&NumNode> { unwrap_enum!(self, Nodes::Num) } pub fn str(&self) -> Option<&StrNode> { unwrap_enum!(self, Nodes::Str) } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs @@ -11,7 +11,7 @@ pub mod token; pub mod ast; /// Dealing with associativity and precedence. -mod operators; +pub mod operators; /// Lexer splits code up into a token-stream /// of relevant lexical tokens, making the diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs @@ -2,7 +2,7 @@ use super::token; use super::ast; use super::operators; -#[macro_use] + use super::super::err; use token::{Token, TokenType};