valhallac

Compiler for set-theoretic programming language.
git clone git://git.knutsen.co/valhallac
Log | Files | Refs | README | LICENSE

commit cacb37c86052a5fc8b02526552673ff2594f9ca8
parent 7b68c972a5cc2b827ee8c5f9e43f4aaf14eb7469
Author: Demonstrandum <moi@knutsen.co>
Date:   Thu,  1 Aug 2019 19:19:27 +0100

Added very rudimentary static typing

Diffstat:
Msrc/compiler/block.rs | 18++++++++++--------
Msrc/compiler/internal_functions.rs | 34+++++++++++++++++-----------------
Msrc/compiler/types.rs | 25++++++++++++++++---------
Msrc/syntax/analyser.rs | 1-
Msrc/syntax/ast.rs | 100++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Msrc/syntax/parser.rs | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest.vh | 12+++++++++---
7 files changed, 213 insertions(+), 62 deletions(-)

diff --git a/src/compiler/block.rs b/src/compiler/block.rs @@ -1,6 +1,5 @@ use std::fmt; -use std::collections::HashMap; -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use super::super::err; @@ -9,7 +8,6 @@ use syntax::ast; use super::element; use super::instructions; -use super::types; use element::{Element, Symbol}; use instructions::{Instr, Operators}; @@ -40,6 +38,7 @@ pub struct LocalBlock<'a> { constants : Vec<Element<'a>>, instructions : Vec<Instr>, globals : Vec<String>, + pub return_type : ast::StaticTypes, // Used only for compilation: locals_map : HashMap<String, u16>, @@ -62,6 +61,7 @@ impl<'a> LocalBlock<'a> { constants: vec![], instructions: vec![], globals: vec![], + return_type: ast::StaticTypes::TUnknown, locals_map: HashMap::new(), types_to_check: VecDeque::new(), @@ -78,7 +78,7 @@ impl<'a> LocalBlock<'a> { fn ident_assignment(&mut self, left : &ast::IdentNode, right : &'a ast::Nodes) { if self.types_to_check.is_empty() { issue!(err::Types::TypeError, self.filename, err::NO_TOKEN, self.current_line, - "You must state what set `{}' is a member of. No type annotation found.", left.value); + "You must state what set `{}' is a member of. No type-annotation found.", left.value); } if self.locals_map.contains_key(&left.value) { issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line, @@ -88,10 +88,12 @@ impl<'a> LocalBlock<'a> { self.locals_map.insert(left.value.to_owned(), index); self.emit(right); - self.instructions.push(Instr::Operator(Operators::DUP as u8)); - let type_node = self.types_to_check.pop_front().unwrap().1; - self.emit(type_node); - self.instructions.push(Instr::Operator(Operators::CHECK_TYPE as u8)); + if left.static_type == ast::StaticTypes::TUnknown { + self.instructions.push(Instr::Operator(Operators::DUP as u8)); + let type_node = self.types_to_check.pop_front().unwrap().1; + self.emit(type_node); + self.instructions.push(Instr::Operator(Operators::CHECK_TYPE as u8)); + } self.instructions.push(Instr::Operator(Operators::STORE_LOCAL as u8)); self.instructions.push(Instr::Operand(index)); } diff --git a/src/compiler/internal_functions.rs b/src/compiler/internal_functions.rs @@ -7,7 +7,7 @@ use syntax::ast; /// Gets the appropriate operator for the internal functions. /// Assumes all args have equal type. pub fn get_internal_op(ident : &str, args : Option<&Vec<&ast::Nodes>>) -> Option<Instr> { - let mut first = ast::BaseTypes::TUnknown; + let mut first = ast::StaticTypes::TUnknown; let mut is_uni = args.is_none(); if !is_uni { let unwrapped = args.unwrap(); @@ -20,40 +20,40 @@ pub fn get_internal_op(ident : &str, args : Option<&Vec<&ast::Nodes>>) -> Option if is_uni { return Some(Instr::Operator(Operators::U_ADD as u8)); } Some(Instr::Operator(match first { - ast::BaseTypes::TNatural => Operators::N_ADD, - ast::BaseTypes::TInteger => Operators::I_ADD, - ast::BaseTypes::TReal => Operators::R_ADD, - _ => Operators::U_ADD + ast::StaticTypes::TNatural => Operators::N_ADD, + ast::StaticTypes::TInteger => Operators::I_ADD, + ast::StaticTypes::TReal => Operators::R_ADD, + _ => Operators::U_ADD } as u8)) }, "-" => { if is_uni { return Some(Instr::Operator(Operators::U_SUB as u8)); } Some(Instr::Operator(match first { - ast::BaseTypes::TNatural => Operators::N_SUB, - ast::BaseTypes::TInteger => Operators::I_SUB, - ast::BaseTypes::TReal => Operators::R_SUB, - _ => Operators::U_SUB + ast::StaticTypes::TNatural => Operators::N_SUB, + ast::StaticTypes::TInteger => Operators::I_SUB, + ast::StaticTypes::TReal => Operators::R_SUB, + _ => Operators::U_SUB } as u8)) }, "*" => { if is_uni { return Some(Instr::Operator(Operators::U_MUL as u8)); } Some(Instr::Operator(match first { - ast::BaseTypes::TNatural => Operators::N_MUL, - ast::BaseTypes::TInteger => Operators::I_MUL, - ast::BaseTypes::TReal => Operators::R_MUL, - _ => Operators::U_MUL + ast::StaticTypes::TNatural => Operators::N_MUL, + ast::StaticTypes::TInteger => Operators::I_MUL, + ast::StaticTypes::TReal => Operators::R_MUL, + _ => Operators::U_MUL } as u8)) }, "/" => { if is_uni { return Some(Instr::Operator(Operators::U_DIV as u8)); } Some(Instr::Operator(match first { - ast::BaseTypes::TNatural => Operators::N_DIV, - ast::BaseTypes::TInteger => Operators::I_DIV, - ast::BaseTypes::TReal => Operators::R_DIV, - _ => Operators::U_DIV + ast::StaticTypes::TNatural => Operators::N_DIV, + ast::StaticTypes::TInteger => Operators::I_DIV, + ast::StaticTypes::TReal => Operators::R_DIV, + _ => Operators::U_DIV } as u8)) } _ => None diff --git a/src/compiler/types.rs b/src/compiler/types.rs @@ -18,7 +18,7 @@ macro_rules! is_elem { #[derive(Clone, PartialEq)] pub struct Set<'a> { - base_type : Option<ast::BaseTypes>, + base_type : Option<ast::StaticTypes>, elements : Vec<Element<'a>>, unions : Vec<Set<'a>>, intersections : Vec<Set<'a>>, @@ -27,7 +27,7 @@ pub struct Set<'a> { } impl<'a> Set<'a> { - pub fn new(filename : &'a str, base_type : Option<ast::BaseTypes>) -> Self { + pub fn new(filename : &'a str, base_type : Option<ast::StaticTypes>) -> Self { Self { base_type, elements: vec![], @@ -38,14 +38,21 @@ impl<'a> Set<'a> { } } pub fn is_memeber(&self, e : Element) -> bool { - if let Some(base) = self.base_type { + if let Some(base) = &self.base_type { return match base { - ast::BaseTypes::TNatural => is_elem!(e, Element::ENatural), - ast::BaseTypes::TInteger => is_elem!(e, Element::EInteger), - ast::BaseTypes::TReal => is_elem!(e, Element::EReal), - ast::BaseTypes::TSym => is_elem!(e, Element::ESymbol), - ast::BaseTypes::TString => is_elem!(e, Element::EString), - ast::BaseTypes::TNil => e == Element::ENil, + ast::StaticTypes::TNatural => is_elem!(e, Element::ENatural), + ast::StaticTypes::TInteger => is_elem!(e, Element::EInteger), + ast::StaticTypes::TReal => is_elem!(e, Element::EReal), + ast::StaticTypes::TSymbol => is_elem!(e, Element::ESymbol), + ast::StaticTypes::TString => is_elem!(e, Element::EString), + ast::StaticTypes::TFunction(o, r) => { + match e { + Element::ECode(code) => code.return_type == **r, + _ => false + } + }, + + ast::StaticTypes::TNil => e == Element::ENil, _ => false }; } diff --git a/src/syntax/analyser.rs b/src/syntax/analyser.rs @@ -89,5 +89,4 @@ pub fn replace(root : &mut ast::Root) { } // END TOP-LEVEL CONSTANT FOLD i += 1; } - println!("\n\n{}", root); } \ No newline at end of file diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs @@ -5,7 +5,10 @@ use std::{fmt, ops}; #[derive(Clone)] pub struct IdentNode { /// The name of the identifier. - pub value : String + pub value : String, + + /// Type it holds. + pub static_type : StaticTypes } /// Different types of possible number types in the language. @@ -20,17 +23,17 @@ pub enum Numerics { Real(f64) } -fn strongest_cast(left : Numerics, right : Numerics) -> BaseTypes { - let mut cast = BaseTypes::TNatural; +fn strongest_cast(left : Numerics, right : Numerics) -> StaticTypes { + let mut cast = StaticTypes::TNatural; match left { - Numerics::Real(_) => cast = BaseTypes::TReal, - Numerics::Integer(_) => cast = BaseTypes::TInteger, + Numerics::Real(_) => cast = StaticTypes::TReal, + Numerics::Integer(_) => cast = StaticTypes::TInteger, _ => () }; - if cast == BaseTypes::TReal { return cast; } + if cast == StaticTypes::TReal { return cast; } match right { - Numerics::Real(_) => cast = BaseTypes::TReal, - Numerics::Integer(_) => cast = BaseTypes::TInteger, + Numerics::Real(_) => cast = StaticTypes::TReal, + Numerics::Integer(_) => cast = StaticTypes::TInteger, _ => () }; cast @@ -51,9 +54,9 @@ macro_rules! fold_on_numeric { { let cast = strongest_cast($left, $right); match cast { - BaseTypes::TNatural => (new_base!($left, usize) $op new_base!($right, usize)).to_numeric(), - BaseTypes::TInteger => (new_base!($left, isize) $op new_base!($right, isize)).to_numeric(), - BaseTypes::TReal => (new_base!($left, f64) $op new_base!($right, f64)).to_numeric(), + StaticTypes::TNatural => (new_base!($left, usize) $op new_base!($right, usize)).to_numeric(), + StaticTypes::TInteger => (new_base!($left, isize) $op new_base!($right, isize)).to_numeric(), + StaticTypes::TReal => (new_base!($left, f64) $op new_base!($right, f64)).to_numeric(), _ => panic!("Numeric porting non-numeric type?") } } @@ -211,7 +214,12 @@ pub struct CallNode { /// Pointer to heap allocated calling node. pub callee : Box<Nodes>, /// Pointer to list of operand nodes. - pub operands : Vec<Nodes> + pub operands : Vec<Nodes>, + + /// What type its operand is. + pub operand_type : StaticTypes, + /// What type it returns. + pub return_type : StaticTypes } /// Represents a block of code / compound statements @@ -228,18 +236,42 @@ pub struct LineNode { } #[derive(Clone)] +pub struct FileNode { + pub filename : String +} + +#[derive(Clone)] pub struct EmptyNode; /// All base types, determined at compile time. -#[derive(Clone, Copy, PartialEq)] -pub enum BaseTypes { +#[derive(Clone, PartialEq)] +pub enum StaticTypes { TNatural, TInteger, TReal, - TString, TSym, + TString, TSymbol, + TSet(Box<StaticTypes>), + TFunction(Box<StaticTypes>, Box<StaticTypes>), TNil, TUnknown } +impl fmt::Display for StaticTypes { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let s = match self { + StaticTypes::TNatural => "Natural".to_string(), + StaticTypes::TInteger => "Integer".to_string(), + StaticTypes::TReal => "Real".to_string(), + StaticTypes::TString => "String".to_string(), + StaticTypes::TSymbol => "Symbol".to_string(), + StaticTypes::TSet(st) => format!("Set({})", st), + StaticTypes::TFunction(o, r) => format!("Function({}, {})", o, r), + StaticTypes::TNil => "Nil".to_string(), + StaticTypes::TUnknown => "Unknown".to_string(), + }; + write!(f, "{}", s) + } +} + /// All node types. #[derive(Clone)] pub enum Nodes { @@ -250,6 +282,7 @@ pub enum Nodes { Call(CallNode), Block(BlockNode), Line(LineNode), + File(FileNode), Empty(EmptyNode), } @@ -266,6 +299,7 @@ impl fmt::Display for Nodes { node.operands.iter().map(Nodes::to_string).collect::<Vec<String>>().join("\n ")), Nodes::Block(_) => format!("%block{{ ... }}"), Nodes::Line(node) => format!("%newline{{ :line {} }}", node.line), + Nodes::File(node) => format!("%newfile{{ :filename {} }}", node.filename), Nodes::Empty(_) => String::from("()"), }; write!(f, "{}", printable) @@ -283,19 +317,25 @@ macro_rules! unwrap_enum { impl Nodes { - pub fn yield_type(&self) -> BaseTypes { + /// Function that returns the statically known type + /// of any syntactic node generated. + pub fn yield_type(&self) -> StaticTypes { match self { Nodes::Num(nn) => { match nn.value { - Numerics::Natural(_) => BaseTypes::TNatural, - Numerics::Integer(_) => BaseTypes::TInteger, - Numerics::Real(_) => BaseTypes::TReal, + Numerics::Natural(_) => StaticTypes::TNatural, + Numerics::Integer(_) => StaticTypes::TInteger, + Numerics::Real(_) => StaticTypes::TReal, } }, - Nodes::Str(_) => BaseTypes::TString, - Nodes::Sym(_) => BaseTypes::TSym, - - _ => BaseTypes::TUnknown + Nodes::Str(_) => StaticTypes::TString, + Nodes::Sym(_) => StaticTypes::TSymbol, + Nodes::Ident(i) => i.static_type.clone(), + Nodes::Call(c) => StaticTypes::TFunction( + Box::new(c.operand_type.clone()), + Box::new(c.return_type.clone())), + + _ => StaticTypes::TUnknown } } @@ -306,6 +346,7 @@ impl Nodes { pub fn call(&self) -> Option<&CallNode> { unwrap_enum!(self, Nodes::Call) } pub fn block(&self) -> Option<&BlockNode> { unwrap_enum!(self, Nodes::Block) } pub fn line(&self) -> Option<&LineNode> { unwrap_enum!(self, Nodes::Line) } + pub fn file(&self) -> Option<&FileNode> { unwrap_enum!(self, Nodes::File) } pub fn empty(&self) -> Option<&EmptyNode> { unwrap_enum!(self, Nodes::Empty) } pub fn is_atomic(&self) -> bool { @@ -328,7 +369,12 @@ impl Nodes { } impl IdentNode { - pub fn new(value : &str) -> Nodes { Nodes::Ident(IdentNode { value: value.to_string() }) } + pub fn new(value : &str) -> Nodes { + Nodes::Ident(IdentNode { + value: value.to_string(), + static_type: StaticTypes::TUnknown + }) + } } impl NumNode { @@ -351,6 +397,8 @@ impl CallNode { Nodes::Call(CallNode { callee: Box::new(callee), operands: operands, + operand_type: StaticTypes::TUnknown, + return_type: StaticTypes::TUnknown }) } @@ -368,6 +416,10 @@ impl LineNode { pub fn new(line : usize) -> Nodes { Nodes::Line(LineNode { line }) } } +impl FileNode { + pub fn new(filename : String) -> Nodes { Nodes::File(FileNode { filename }) } +} + impl EmptyNode { pub fn new() -> Nodes { Nodes::Empty(EmptyNode { }) } } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs @@ -1,3 +1,5 @@ +use std::collections::{HashMap, VecDeque}; + use super::token; use super::ast; use super::operators; @@ -6,6 +8,7 @@ use super::super::err; use token::{Token, TokenType}; use ast::Nodes; +use ast::StaticTypes as ST; pub fn parse(stream : Vec<Token>, file : &str) -> ast::Root { let mut environment = ParseEnvironment::new(stream, file); @@ -21,6 +24,8 @@ struct ParseEnvironment<'a> { pub stream : Vec<Token>, pub optable : operators::PrecedenceTable<'a>, pub file : &'a str, + pub annotations : VecDeque<ast::CallNode>, + pub ident_types : HashMap<String, ast::StaticTypes>, ignore_newline : bool, line_number : usize, @@ -32,6 +37,8 @@ impl<'a> ParseEnvironment<'a> { root: ast::Root::new(), stream: stream, optable: operators::PrecedenceTable::new(), + annotations: VecDeque::new(), + ident_types: HashMap::new(), file, ignore_newline: false, @@ -40,6 +47,8 @@ impl<'a> ParseEnvironment<'a> { } pub fn start(&mut self) { + self.root.branches.push(ast::FileNode::new(self.file.to_owned())); + let mut current = self.stream.first(); while current.is_some() && current.unwrap().class != TokenType::EOF { if current.unwrap().class == TokenType::Term { @@ -51,6 +60,28 @@ impl<'a> ParseEnvironment<'a> { self.root.branches.push(e); current = self.stream.get(0); } + self.assign_types(); + } + + fn get_type(&self, node : &ast::Nodes) -> ast::StaticTypes { + if let Some(ident) = node.ident() { + return match ident.value.as_str() { + "Nat" => ST::TSet(Box::new(ST::TNatural)), + "Int" => ST::TSet(Box::new(ST::TInteger)), + "Real" => ST::TSet(Box::new(ST::TReal)), + "Universal" => ST::TSet(Box::new(ST::TUnknown)), + _ => ident.static_type.clone() + }; + } + node.yield_type() + } + + fn remember_type(&mut self, k : String, v : ast::StaticTypes) { + if self.ident_types.contains_key(&k) { + self.ident_types.insert(k, ast::StaticTypes::TUnknown); + return; + } + self.ident_types.insert(k, v); } fn shift(&mut self) -> Token { @@ -158,6 +189,27 @@ impl<'a> ParseEnvironment<'a> { left = self.func_apply(left); } } + if !left.call().unwrap().is_binary() { return left; } + if let Some(call_ident) = left.call().unwrap().callee.call().unwrap().callee.ident() { + if call_ident.value == ":" { + self.annotations.push_back(left.call().unwrap().clone()); + } + if call_ident.value == "=" { + let maybe_annotation = self.annotations.pop_front(); + if let Some(annotation) = maybe_annotation { + let maybe_set = self.get_type(&annotation.operands[0]); + if let ast::StaticTypes::TSet(set) = maybe_set { + self.remember_type( + left.call().unwrap().callee.call().unwrap().operands[0].ident().unwrap().value.to_owned(), + *set); + } else { + // Error, annotation must be set. + } + } else { + // Error, missing annotation for assignment. + } + } + } return left; } @@ -196,6 +248,39 @@ impl<'a> ParseEnvironment<'a> { "Unexpected token type: `{}`, expected: `{}`.", t.class, tt); } } + + + + fn assign_types(&mut self) { + + fn recurse_type_assign(subtree : &Nodes, map : &HashMap<String, ST>) -> Nodes { + match subtree { + Nodes::Ident(ident_node) => { + if map.contains_key(&ident_node.value) { + let mut cloned_ident = ident_node.clone(); + cloned_ident.static_type = map[&ident_node.value].clone(); + return Nodes::Ident(cloned_ident); + } + }, + Nodes::Call(call_node) => { + let mut cloned_call = call_node.clone(); + cloned_call.callee = Box::new(recurse_type_assign(&*call_node.callee, map)); + cloned_call.operands = vec![recurse_type_assign(&call_node.operands[0], map)]; + return Nodes::Call(cloned_call); + }, + _ => () + }; + return subtree.to_owned(); + }; + + let mut i = 0; + let tree_size = self.root.branches.len(); + + while i < tree_size { + self.root.branches[i] = recurse_type_assign(&self.root.branches[i], &self.ident_types); + i += 1; + } + } } #[cfg(test)] diff --git a/test.vh b/test.vh @@ -1,2 +1,8 @@ -a : Int -a = 3 + 9- \ No newline at end of file +a : Nat +a = 3 + 9 + +b : Real +b = a + 4 + +c : XYZ +c = a + b+ \ No newline at end of file