valhallac

Compiler for set-theoretic programming language.
git clone git://git.knutsen.co/valhallac
Log | Files | Refs | README | LICENSE

commit 4c22f60b1198153ec1880f15c5a9d172510493fa
parent 4d5959d62ea46789cffe2099f3c57c5422ccebc5
Author: Demonstrandum <moi@knutsen.co>
Date:   Mon, 12 Aug 2019 00:12:24 +0100

Compile simple curried functions.

Diffstat:
MREADME.md | 15+++++++++++++++
Acurrent_compiler_test.md | 265+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msamples/functions.vh | 21+++++++++++++++------
Msrc/compiler/block.rs | 175++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Msrc/compiler/element.rs | 20++++++++++----------
Msrc/compiler/instructions.rs | 2++
Msrc/err.rs | 8++++----
Msrc/lib.rs | 2+-
Msrc/syntax/analyser.rs | 128++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Msrc/syntax/ast.rs | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/syntax/location.rs | 1+
Msrc/syntax/parser.rs | 18++++++++++++++----
Msrc/syntax/token.rs | 3++-
Mtest.vh | 17++++++++++++++---
14 files changed, 602 insertions(+), 146 deletions(-)

diff --git a/README.md b/README.md @@ -28,6 +28,12 @@ What's been done so far on the front-end: instructions as well as basic arithmetic. - [x] Access, assignment and retrieval of local variables within code-block scope. + - [x] Generating curried functions. + - [ ] Optimise functions to not curry when currying is not neccesary (by tracking arity of + function's definition and function's call). + - [ ] Optimise functions to not search globally for variables when they + come from nested closures (nested closures implement currying). + - [ ] Optimise functions for tail calls. - [ ] Track variable and function types. - [ ] Marshaling, i.e. serialising the bytecode and storing it in a file for future interpretation and execution by the virtual machine. @@ -36,6 +42,9 @@ What's been done so far on the front-end: The VM, i.e. the backend for the language, is being developed independently and will have its own progress and check-list updates. +### Example of what the compiler currently does: +[current_compiler_test.md](https://github.com/Demonstrandum/valhallac/current_compiler_test.md) + ### Description This repository contains the front-end (parser and @@ -57,3 +66,8 @@ verify proofs and such in and around set theory. The language is a general purpose, but instead of being totally object-oriented, or functional, etc., it's just set theory based. From what I've gathered, it's not a very popular paradigm. + +### Dependencies +Yikes... + +![deps](https://github.com/Demonstrandum/valhalla/raw/master/graph.png)+ \ No newline at end of file diff --git a/current_compiler_test.md b/current_compiler_test.md @@ -0,0 +1,264 @@ +# Compiler Example +This file contains a regularly updated code example. +The compiled code you will see is currently the best the compiler can do. +Obviously, there are many optimisations yet to come. + +## Example 1 +Given the source code: +```hs +a : Nat -> Nat -> Int +a n m = n - 2*m + +a 1 2 + +-- a = n |-> (m |-> n + 2*m) +-- |_____________| +-- | +-- func: `a__1` +-- |_____________________| +-- | +-- func: `a__0` +-- |__________________________| +-- | +-- func: a +``` + +--- + +The code is lexically analysed and generates the following tokens-stream: +```clojure +[ [ Identifier: "a" (1, 1):1 ], + [ Operator: ":" (1, 3):1 ], + [ Identifier: "Nat" (1, 5):3 ], + [ Operator: "->" (1, 9):2 ], + [ Identifier: "Nat" (1, 12):3 ], + [ Operator: "->" (1, 16):2 ], + [ Identifier: "Int" (1, 19):3 ], + [ Terminator: "\n" (1, 22):1 ], + [ Identifier: "a" (2, 1):1 ], + [ Identifier: "n" (2, 3):1 ], + [ Identifier: "m" (2, 5):1 ], + [ Operator: "=" (2, 7):1 ], + [ Identifier: "n" (2, 9):1 ], + [ Operator: "-" (2, 11):1 ], + [ Numeric: "2" (2, 13):1 ], + [ Operator: "*" (2, 14):1 ], + [ Identifier: "m" (2, 15):1 ], + [ Terminator: "\n" (2, 16):1 ], + [ Terminator: "\n" (3, 1):1 ], + [ Identifier: "a" (4, 1):1 ], + [ Numeric: "1" (4, 3):1 ], + [ Numeric: "2" (4, 5):1 ], + [ Terminator: "\n" (4, 6):1 ], + [ Terminator: "\n" (5, 1):1 ], + [ Terminator: "\n" (6, 29):1 ], + [ Terminator: "\n" (7, 29):1 ], + [ Terminator: "\n" (8, 22):1 ], + [ Terminator: "\n" (9, 30):1 ], + [ Terminator: "\n" (10, 31):1 ], + [ Terminator: "\n" (11, 20):1 ], + [ Terminator: "\n" (12, 28):1 ], + [ Terminator: "\n" (13, 32):1 ], + [ Terminator: "\n" (14, 14):1 ], + [ End-Of-File: "\u{0}" (15, 17):1 ] ] +``` + +--- + +From the token-stream, an abstract syntax tree (AST) is generated: +```hs +[| + %newfile{ :filename test.vh } + %newline{ :line 1 } + %call{ + :yield anything + :callee ( + %call{ + :yield anything + :callee ( + %ident{ :value ":"; :yield anything } + ) + :operand [| + %ident{ :value "a"; :yield (Nat ↦ (Nat 🡒 Int)) } + |] + } + ) + :operand [| + %call{ + :yield anything + :callee ( + %call{ + :yield anything + :callee ( + %ident{ :value "->"; :yield anything } + ) + :operand [| + %ident{ :value "Nat"; :yield Nat } + |] + } + ) + :operand [| + %call{ + :yield anything + :callee ( + %call{ + :yield anything + :callee ( + %ident{ :value "->"; :yield anything } + ) + :operand [| + %ident{ :value "Nat"; :yield Nat } + |] + } + ) + :operand [| + %ident{ :value "Int"; :yield Int } + |] + } + |] + } + |] + } + %newline{ :line 2 } + %call{ + :yield anything + :callee ( + %call{ + :yield anything + :callee ( + %ident{ :value "="; :yield anything } + ) + :operand [| + %call{ + :yield anything + :callee ( + %call{ + :yield anything + :callee ( + %ident{ :value "a"; :yield anything } + ) + :operand [| + %ident{ :value "n"; :yield anything } + |] + } + ) + :operand [| + %ident{ :value "m"; :yield anything } + |] + } + |] + } + ) + :operand [| + %call{ + :yield natural + :callee ( + %call{ + :yield anything + :callee ( + %ident{ :value "-"; :yield anything } + ) + :operand [| + %ident{ :value "n"; :yield natural } + |] + } + ) + :operand [| + %call{ + :yield anything + :callee ( + %call{ + :yield anything + :callee ( + %ident{ :value "*"; :yield anything } + ) + :operand [| + %num{ :value 2; :yield natural } + |] + } + ) + :operand [| + %ident{ :value "m"; :yield natural } + |] + } + |] + } + |] + } + %newline{ :line 4 } + %call{ + :yield integer + :callee ( + %call{ + :yield (Nat ↦ Int) + :callee ( + %ident{ :value "a"; :yield (Nat ↦ (Nat 🡒 Int)) } + ) + :operand [| + %num{ :value 1; :yield natural } + |] + } + ) + :operand [| + %num{ :value 2; :yield natural } + |] + } + %newline{ :line 15 } +|] +``` + +--- + +And the AST is compiled to bytecode. +The following is a disassembly of the generated bytecode: + +```lisp +a__1: + | ===Constants=============== + | 0 | 2 => (Nat) | + | ===Locals================== + | 0 | m + | ===Globals================= + | 0 | n + | ===Bytecodes=============== + | (00000010):PUSH_LOCAL 0 + | (00000001):PUSH_CONST 0 + | (00110001):N_MUL + | (00000011):PUSH_SUPER 0 + | (00110000):U_SUB + +a__0: + | ===Constants=============== + | 0 | a__1 => (Block) | + | 1 | :a__1 => (Sym) | + | ===Locals================== + | 0 | n + | ===Globals================= + | ===Bytecodes=============== + | (00000001):PUSH_CONST 0 + | (00000001):PUSH_CONST 1 + | (00001100):MAKE_FUNC + +<main>: + | ===Constants=============== + | 0 | a__0 => (Block) | + | 1 | :a => (Sym) | + | 2 | 2 => (Nat) | + | 3 | 1 => (Nat) | + | ===Locals================== + | 0 | a + | ===Globals================= + | ===Bytecodes=============== + | (11111110):SET_LINE 2 + | (00000001):PUSH_CONST 0 + | (00000001):PUSH_CONST 1 + | (00001100):MAKE_FUNC + | (00000101):STORE_LOCAL 0 + | (11111110):SET_LINE 4 + | (00000001):PUSH_CONST 2 + | (00000001):PUSH_CONST 3 + | (00000010):PUSH_LOCAL 0 + | (00001001):CALL_1 + | (00001001):CALL_1 + | (11111110):SET_LINE 15 +```+ \ No newline at end of file diff --git a/samples/functions.vh b/samples/functions.vh @@ -1,6 +1,15 @@ -plus : Nat -> Nat -> Nat +a : Nat -> Nat -> Int +a n m = n - 2*m -postulate do: - plus n 0 = n - plus 0 n = n - plus (succ n) m = succ (plus n m)- \ No newline at end of file +a 1 2 + +-- a = n |-> (m |-> n - 2*m) +-- |_____________| +-- | +-- func: `a__1` +-- |_____________________| +-- | +-- func: `a__0` +-- |__________________________| +-- | +-- func: a+ \ No newline at end of file diff --git a/src/compiler/block.rs b/src/compiler/block.rs @@ -5,6 +5,7 @@ use super::super::err; use super::super::syntax; use syntax::ast; +use syntax::ast::Nodes; use super::element; use super::instructions; @@ -29,12 +30,12 @@ pub fn numerics_to_element<'a>(num : &ast::Numerics) -> Element<'a> { } #[derive(Clone)] -struct IdentTypePair<'a>(String, &'a ast::Nodes); +struct IdentTypePair<'a>(String, &'a Nodes); #[derive(Clone)] pub struct LocalBlock<'a> { - pub name : &'a str, - filename : &'a str, + pub name : String, + filename : String, constants : Vec<Element<'a>>, instructions : Vec<Instr>, globals : Vec<String>, @@ -55,10 +56,10 @@ impl<'a> PartialEq for LocalBlock<'a> { } impl<'a> LocalBlock<'a> { - pub fn new(name : &'a str, filename : &'a str) -> Self { + pub fn new(n : &str, f : &str) -> Self { LocalBlock { - name, - filename, + name: n.to_string(), + filename: f.to_string(), constants: vec![], instructions: vec![], globals: vec![], @@ -72,46 +73,103 @@ impl<'a> LocalBlock<'a> { } fn push_const_instr(&mut self, e : Element<'a>) { - let index = append_unique(&mut self.constants, e); - self.instructions.push(Instr::Operator(Operators::PUSH_CONST as u8)); - self.instructions.push(Instr::Operand(index as u16)); + let index = append_unique(&mut self.constants, e) as u16; + self.push_operator(Operators::PUSH_CONST); + self.push_operand(index); } - fn ident_assignment(&mut self, left : &ast::IdentNode, right : &'a ast::Nodes) { + fn push_operator(&mut self, o : Operators) { + self.instructions.push(Instr::Operator(o as u8)); + } + + fn push_operand(&mut self, i : u16) { + self.instructions.push(Instr::Operand(i)); + } + + fn insert_local(&mut self, s : String) -> u16 { + let index = self.locals_map.len() as u16; + self.locals_map.insert(s, index); + index + } + + fn ident_assignment(&mut self, left : &'a ast::IdentNode, right : &'a Nodes) { if self.types_to_check.is_empty() { - issue!(err::Types::TypeError, self.filename, err::NO_TOKEN, self.current_line, + issue!(err::Types::TypeError, &self.filename, err::NO_TOKEN, self.current_line, "You must state what set `{}' is a member of. No type-annotation found.", left.value); } if self.locals_map.contains_key(&left.value) { - issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line, - "Cannot mutate value of `{}', as is already bound.", left.value); + issue!(err::Types::CompError, &self.filename, err::NO_TOKEN, self.current_line, + "Cannot mutate value of `{}', as it is already bound.", left.value); } - let index = self.locals_map.len() as u16; - self.locals_map.insert(left.value.to_owned(), index); + let index = self.insert_local(left.value.to_owned()); self.emit(right); if left.static_type == ast::StaticTypes::TUnknown || left.static_type != right.yield_type() { - self.instructions.push(Instr::Operator(Operators::DUP as u8)); + self.push_operator(Operators::DUP); let type_node = self.types_to_check.pop_front().unwrap().1; self.emit(type_node); - self.instructions.push(Instr::Operator(Operators::CHECK_TYPE as u8)); + self.push_operator(Operators::CHECK_TYPE); } else { // Otherwise just pop, type was already checked statically so // its of no use to include in the compiled program, // as no dynamic checking is needed. self.types_to_check.pop_front(); } - self.instructions.push(Instr::Operator(Operators::STORE_LOCAL as u8)); - self.instructions.push(Instr::Operand(index)); + self.push_operator(Operators::STORE_LOCAL); + self.push_operand(index); + } + + fn function_assign(&mut self, left : &ast::CallNode, right : &'a Nodes) { + let mut arguments = left.collect(); + let base_node = arguments.remove(0); + + if let Nodes::Ident(ident) = base_node { + let name = format!("{}__{}", ident.value.to_owned(), arguments.len() - 1); + + let mut last_block = LocalBlock::new(&name, &self.filename); + // TODO: Be more careful here, not always an ident. + // NEED TO DEAL WITH PATTERN MATCHING. + last_block.insert_local(arguments.last().unwrap().ident().unwrap().value.to_owned()); + last_block.emit(right); + + for i in (0..(arguments.len() - 1)).rev() { + let name = format!("{}__{}", ident.value, i); + let mut super_block = LocalBlock::new( + &name, + &self.filename); + // Also TODO: Pattern matching, be careful in the future. + super_block.insert_local(arguments[i].ident().unwrap().value.to_owned()); + + let block_name = last_block.name.clone(); + super_block.push_const_instr(Element::ECode(last_block)); + super_block.push_const_instr(Element::ESymbol(Symbol::new(&block_name))); + super_block.push_operator(Operators::MAKE_FUNC); + last_block = super_block; + } + + let index = self.insert_local(ident.value.to_owned()); + + self.push_const_instr(Element::ECode(last_block)); + self.push_const_instr(Element::ESymbol(Symbol::new(&ident.value))); + self.push_operator(Operators::MAKE_FUNC); + self.push_operator(Operators::STORE_LOCAL); + self.push_operand(index); + return; + } + + // A function of multiple arguments (say 3 f.eks), + // must generate a function, which when called returns + // a function, and when that function is called, it returns + // the final value. } - fn annotation(&mut self, left : &ast::IdentNode, right : &'a ast::Nodes) { + fn annotation(&mut self, left : &ast::IdentNode, right : &'a Nodes) { self.types_to_check.push_back(IdentTypePair(left.value.to_owned(), right)); } - fn emit(&mut self, node : &'a ast::Nodes) { + fn emit(&mut self, node : &'a Nodes) { match node { - ast::Nodes::Line(line_node) => { + Nodes::Line(line_node) => { let len = self.instructions.len(); if len > 1 { if self.instructions[len - 2] == Instr::Operator(Operators::SET_LINE as u8) { @@ -120,31 +178,31 @@ impl<'a> LocalBlock<'a> { } } self.current_line = line_node.line; - self.instructions.push(Instr::Operator(Operators::SET_LINE as u8)); - self.instructions.push(Instr::Operand(self.current_line as u16)); + self.push_operator(Operators::SET_LINE); + self.push_operand(self.current_line as u16); } - ast::Nodes::Ident(ident_node) => { + Nodes::Ident(ident_node) => { let s = &ident_node.value; if !self.locals_map.contains_key(s) { - self.instructions.push(Instr::Operator(Operators::PUSH_SUPER as u8)); - let index = append_unique(&mut self.globals, s.to_owned()); - self.instructions.push(Instr::Operand(index as u16)); + self.push_operator(Operators::PUSH_SUPER); + let index = append_unique(&mut self.globals, s.to_owned()) as u16; + self.push_operand(index); return; } - self.instructions.push(Instr::Operator(Operators::PUSH_LOCAL as u8)); - self.instructions.push(Instr::Operand(self.locals_map[s])); + self.push_operator(Operators::PUSH_LOCAL); + self.push_operand(self.locals_map[s]); }, - ast::Nodes::Num(num_node) => { + Nodes::Num(num_node) => { self.push_const_instr(numerics_to_element(&num_node.value)); }, - ast::Nodes::Str(str_node) => { + Nodes::Str(str_node) => { self.push_const_instr(Element::EString(&str_node.value)); }, - ast::Nodes::Sym(sym_node) => { + Nodes::Sym(sym_node) => { self.push_const_instr(Element::ESymbol(Symbol::new(&sym_node.value))); }, - ast::Nodes::Call(call_node) => { + Nodes::Call(call_node) => { if call_node.is_binary() { let ident = call_node.callee.call().unwrap().callee.ident().unwrap(); let args = vec![ @@ -155,26 +213,26 @@ impl<'a> LocalBlock<'a> { // Check for cast. if ident.value == "cast" { self.emit(args[0]); - self.instructions.push(Instr::Operator(Operators::CAST as u8)); + self.push_operator(Operators::CAST); if let Some(cast_name) = args[1].get_name() { let cast_to : u16 = match cast_name { "Real" => 0b00000011, "Int" => 0b00000010, "Nat" => 0b00000001, - _ => issue!(err::Types::TypeError, self.filename, err::NO_TOKEN, self.current_line, + _ => issue!(err::Types::TypeError, &self.filename, err::NO_TOKEN, self.current_line, "Compiler does not know how to cast to `{}'.", cast_name) }; let cast_from = match args[0].yield_type() { ast::StaticTypes::TReal => 0b00000011, ast::StaticTypes::TInteger => 0b00000010, ast::StaticTypes::TNatural => 0b00000001, - _ => issue!(err::Types::TypeError, self.filename, err::NO_TOKEN, self.current_line, + _ => issue!(err::Types::TypeError, &self.filename, err::NO_TOKEN, self.current_line, "Compiler does not know how to cast from `{}'.", args[0].yield_type()) }; - self.instructions.push(Instr::Operand(cast_from << 8 | cast_to)); + self.push_operand(cast_from << 8 | cast_to); } else { - issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line, + issue!(err::Types::CompError, &self.filename, err::NO_TOKEN, self.current_line, "Cast-type provided to `cast' has to be a type-name.") } return; @@ -183,8 +241,10 @@ impl<'a> LocalBlock<'a> { // Check for assignment. if ident.value == "=" { // Direct variable assignment: - if let ast::Nodes::Ident(left) = args[0] { + if let Nodes::Ident(left) = args[0] { self.ident_assignment(left, args[1]); + } else if let Nodes::Call(left) = args[0] { + self.function_assign(left, args[1]); } return; } @@ -194,7 +254,7 @@ impl<'a> LocalBlock<'a> { // If the LHS is not an ident, it is not a // valid annotation. if args[0].ident().is_none() { - issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line, + issue!(err::Types::CompError, &self.filename, err::NO_TOKEN, self.current_line, "Left of `:` type annotator must be an identifier."); } let left = args[0].ident().unwrap(); @@ -213,15 +273,18 @@ impl<'a> LocalBlock<'a> { return; } } + // TODO: Optimise to implicitly ignore currying and use CALL_N instead. + // Also, check that we are indeed calling a function, and not anything else + // by checking the static yield type. self.emit(&call_node.operands[0]); self.emit(&*call_node.callee); - self.instructions.push(Instr::Operator(Operators::CALL_1 as u8)); + self.push_operator(Operators::CALL_1); }, _ => () }; } - pub fn generate(&mut self, nodes : &'a Vec<ast::Nodes>) { + pub fn generate(&mut self, nodes : &'a Vec<Nodes>) { for node in nodes { self.emit(node); } @@ -230,21 +293,31 @@ impl<'a> LocalBlock<'a> { impl<'a> fmt::Display for LocalBlock<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "===Constants===============\n")?; + for c in &self.constants { + if let Element::ECode(local_block) = c { + write!(f, "{}", local_block)?; + } + } + write!(f, "\n{}:\n", self.name)?; + write!(f, " | ===Constants===============\n")?; for (i, c) in self.constants.iter().enumerate() { - write!(f, "{: >3} | {} |\n", i, c)?; + write!(f, " | {: >3} | {} |\n", i, c)?; } - write!(f, "===Locals==================\n")?; + write!(f, " | ===Locals==================\n")?; for key in self.locals_map.keys() { - write!(f, "{: >3} | {}\n", self.locals_map[key], key)?; + write!(f, " | {: >3} | {}\n", self.locals_map[key], key)?; } - write!(f, "===Globals=================\n")?; + write!(f, " | ===Globals=================\n")?; for (i, c) in self.globals.iter().enumerate() { - write!(f, "{: >3} | {}\n", i, c)?; + write!(f, " | {: >3} | {}\n", i, c)?; } - write!(f, "===Bytecodes===============\n")?; + write!(f, " | ===Bytecodes===============\n")?; for inst in &self.instructions { - write!(f, "{}", inst)?; + if let Instr::Operand(_) = inst { + write!(f, "{}", inst)?; + } else { + write!(f, " | {}", inst)?; + } } write!(f, "") } diff --git a/src/compiler/element.rs b/src/compiler/element.rs @@ -7,10 +7,10 @@ use snailquote::escape; use super::block; use super::types; -#[derive(Clone, Copy)] -pub struct Symbol<'a> { +#[derive(Clone)] +pub struct Symbol { hash : u64, - string : &'a str + string : String } fn hash_symbol(string : &str) -> u64 { @@ -19,22 +19,22 @@ fn hash_symbol(string : &str) -> u64 { s.finish() } -impl<'a> Symbol<'a> { - pub fn new(string : &'a str) -> Self { +impl Symbol { + pub fn new(s : &str) -> Self { Symbol { - hash: hash_symbol(string), - string + hash: hash_symbol(s), + string: s.to_owned() } } } -impl<'a> fmt::Display for Symbol<'a> { +impl fmt::Display for Symbol { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, ":{}", self.string) } } -impl<'a> PartialEq for Symbol<'a> { +impl PartialEq for Symbol { fn eq(&self, other : &Self) -> bool { self.hash == other.hash } @@ -46,7 +46,7 @@ pub enum Element<'a> { EInteger(isize), EReal(f64), EString(&'a str), - ESymbol(Symbol<'a>), + ESymbol(Symbol), ECode(block::LocalBlock<'a>), ESet(types::Set<'a>), ENil diff --git a/src/compiler/instructions.rs b/src/compiler/instructions.rs @@ -42,6 +42,7 @@ pub enum Operators { CALL_1 = 9, CHECK_TYPE = 10, CAST = 11, + MAKE_FUNC = 12, N_ADD = 40, I_ADD = 41, @@ -82,6 +83,7 @@ impl fmt::Display for Operators { Operators::CALL_1 => "CALL_1\n", Operators::CHECK_TYPE => "CHECK_TYPE\n", Operators::CAST => "CAST", + Operators::MAKE_FUNC => "MAKE_FUNC\n", Operators::N_ADD => "N_ADD\n", Operators::I_ADD => "I_ADD\n", diff --git a/src/err.rs b/src/err.rs @@ -35,8 +35,8 @@ pub fn tissue(class : Types, filename : &str, token : &token::Token, message : let file = fs::File::open(filename).expect("Invalid filename for error message."); let line = BufReader::new(file).lines().nth((token.location.line - 1) as usize).unwrap().unwrap(); - let unindented = unindent(message); - eprintln!("{}{} {}", "issue".bold().red(), ":".white(), unindented.bold()); + let formatted = unindent(message).split('\n').collect::<Vec<&str>>().join("\n "); + eprintln!("{}{} {}", "issue".bold().red(), ":".white(), formatted.bold()); eprint!("{}", "".clear()); eprintln!(" ==> {class} in (`{file}`:{line}:{col}):\n{space}|\n{line_str}| {stuff}", class=class.to_string().bold(), file=filename, line=token.location.line, @@ -51,8 +51,8 @@ pub fn lissue(class : Types, filename : &str, line_n : usize, message : &str) { let file = fs::File::open(filename).expect("Invalid filename for error message."); let line = BufReader::new(file).lines().nth((line_n - 1) as usize).unwrap().unwrap(); - let unindented = unindent(message); - eprintln!("{}{} {}", "issue".bold().red(), ":".white(), unindented.bold()); + let formatted = unindent(message).split("\n").collect::<Vec<&str>>().join("\n "); + eprintln!("{}{} {}", "issue".bold().red(), ":".white(), formatted.bold()); eprint!("{}", "".clear()); eprintln!(" ==> {class} in (`{file}`:{line}):\n{space}|\n{line_str}| {stuff}", class=class.to_string().bold(), file=filename, line=line_n, diff --git a/src/lib.rs b/src/lib.rs @@ -20,6 +20,6 @@ pub fn parse(filename : &str) { code_block.generate(&root.branches); - println!("Code Block:\n{}", code_block) + println!("Code Blocks:\n{}", code_block) } diff --git a/src/syntax/analyser.rs b/src/syntax/analyser.rs @@ -1,5 +1,4 @@ -use std::collections::{HashMap, VecDeque}; -use std::cell::RefCell; +use std::collections::HashMap; use crate::err; @@ -163,8 +162,6 @@ fn balance_types(node : &Nodes) -> Nodes { return node.to_owned(); } -type VarType = (String, ast::StaticTypes); - #[derive(Clone)] struct TypeChecker { pub source_line : usize, @@ -218,78 +215,97 @@ impl TypeChecker { return clone; } else { // Error: We need the left to be an ident. - issue!(err::Types::TypeError, + issue!(err::Types::ParseError, self.source_file.as_str(), err::NO_TOKEN, self.source_line, "The left side of the member-of operator (`:`), must be an identifier. - Only variable names can be declared as being members of sets."); + You supplied a type of `{}'. + Only variable names can be declared as being members of sets.", + callee.operands[0].node_type()); } }, "=" => { // This is useful for checking variables in functions. - if let Nodes::Call(ref assignee) = callee.operands[0] { - // Check all the types in the annotation (A -> B -> C) - // and match them to the arguments found on the left side - // of the assignment (=). Compile these matches into a list - // and pass that list into a new TypeChecker object which checks - // the right hand side of the assignment, matching up the sub-scoped - // variables. - - // A -> B -> C -> D - // f a b c = d - // <=> - // (A -> (B -> (C -> D))) - // ( ((=) ( (((f a) b) c) )) d) - fn collect_args(s : &TypeChecker, call_node : &Nodes, operands : Vec<Nodes>) -> Vec<Nodes> { - let mut pushed = operands.clone(); - - if let Nodes::Call(call) = call_node { - pushed.insert(0, call.operands[0].clone()); - return collect_args(s, &*call.callee, pushed); + match &callee.operands[0] { + Nodes::Call(ref assignee) => { + // Check all the types in the annotation (A -> B -> C) + // and match them to the arguments found on the left side + // of the assignment (=). Compile these matches into a list + // and pass that list into a new TypeChecker object which checks + // the right hand side of the assignment, matching up the sub-scoped + // variables. + + // A -> B -> C -> D + // f a b c = d + // <=> + // (A -> (B -> (C -> D))) + // ( ((=) ( (((f a) b) c) )) d) + + let mut operands = assignee.collect(); + let mut func_checker = self.clone(); + + let base_node = operands.remove(0); + if base_node.ident().is_none() { + issue!(err::Types::ParseError, + &self.source_file, err::NO_TOKEN, self.source_line, + "Function definitions must have the defining function's base caller + be an identifier! You're trying to define a function that has + `{}' as base caller...", base_node.node_type()); } - if let Nodes::Ident(ident) = call_node { - pushed.insert(0, call_node.clone()); - return pushed; + let maybe_type = self.ident_map.get(&base_node.ident().unwrap().value); + if maybe_type.is_none() { + println!("{}", base_node); + println!("{:?}", self.ident_map); + issue!(err::Types::TypeError, + self.source_file.as_str(), + err::NO_TOKEN, self.source_line, + "Cannot find type annotation for the + function definition of `{}'.", + base_node.ident().unwrap().value); } - issue!(err::Types::ParseError, - s.source_file.as_str(), - err::NO_TOKEN, s.source_line, - "Function definition must have base caller be an identifier."); - } - - let mut operands = collect_args(&self, &callee.operands[0], vec![]); - let mut func_checker = self.clone(); - - let maybe_type = self.ident_map.get(&operands.remove(0).ident().unwrap().value); - if maybe_type.is_none() { - issue!(err::Types::TypeError, - self.source_file.as_str(), - err::NO_TOKEN, self.source_line, - "Cannot find type annotation for this function."); - } - let mut t = maybe_type.unwrap().clone(); - - for operand in operands { - if let Nodes::Ident(ident) = operand { - if let ast::StaticTypes::TSet(f) = &t { - if let ast::StaticTypes::TFunction(i, o) = *f.clone() { - func_checker.ident_map.insert(ident.value, *i.clone()); - t = *o.clone(); + let mut t = maybe_type.unwrap().clone(); + + for operand in operands { + if let Nodes::Ident(ident) = operand { + if let ast::StaticTypes::TSet(f) = &t { + if let ast::StaticTypes::TFunction(i, o) = *f.clone() { + func_checker.ident_map.insert(ident.value, *i.clone()); + t = *o.clone(); + } } } } - } - call.operands[0] = func_checker.type_branch(&call.operands[0]); - return clone; + call.operands[0] = func_checker.type_branch(&call.operands[0]); + return clone; + } + Nodes::Ident(_assignee) => { + // TODO: + // Here, if the ident exists in the ident_map, that means + // we need to check if both sides of the `=`'s types match up. + // If it does not exist, we need to infer its type by looking at + // the RHS and statically determine the RHS's type, and adding that + // type to the ident_map for the assignee. + } + _ => () } } _ => () } } } - + // TODO HERE: + // We need to check to see if the function being called + // has a statically determined type, and if so, check that + // the operand to that function call has the exact same + // static type. + // If there is a type-mismatch, just throw an `issue!`. + // (If the function is statically typed, so + // must all the arguments be as well). + // The call must have a yield of type `function` and the + // input part of the function (input |-> output), must match + // the type of the operand. :^) call.callee = Box::new(self.type_branch(&*call.callee)); call.operands = vec![self.type_branch(&call.operands[0])]; diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs @@ -1,4 +1,5 @@ use std::{fmt, ops}; +use std::collections::VecDeque; /// Identifiers, node representing a name that /// will represent a value stored. @@ -266,16 +267,36 @@ impl StaticTypes { impl fmt::Display for StaticTypes { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ss; let s = match self { - StaticTypes::TNatural => "Nat".to_string(), - StaticTypes::TInteger => "Int".to_string(), - StaticTypes::TReal => "Real".to_string(), - StaticTypes::TString => "Str".to_string(), - StaticTypes::TSymbol => "Sym".to_string(), - StaticTypes::TSet(st) => format!("Set {}", st), - StaticTypes::TFunction(o, r) => format!("({} -> {})", o, r), - StaticTypes::TNil => "Nil".to_string(), - StaticTypes::TUnknown => "Universal".to_string(), + StaticTypes::TNatural => "natural", + StaticTypes::TInteger => "integer", + StaticTypes::TReal => "real", + StaticTypes::TString => "string", + StaticTypes::TSymbol => "symbol", + StaticTypes::TSet(st) => match *st.clone() { + StaticTypes::TNatural => "Nat", + StaticTypes::TInteger => "Int", + StaticTypes::TReal => "Real", + StaticTypes::TString => "Str", + StaticTypes::TSymbol => "Sym", + StaticTypes::TFunction(o, r) => { + ss = format!("({} \u{1f852} {})", o, r); + ss.as_str() + }, + StaticTypes::TNil => "Nil", + StaticTypes::TUnknown => "Any", + _ => { + ss = format!("Set {}", st); + ss.as_str() + }, + }, + StaticTypes::TFunction(o, r) => { + ss = format!("({} \u{21a6} {})", o, r); + ss.as_str() + }, + StaticTypes::TNil => "nil", + StaticTypes::TUnknown => "anything", }; write!(f, "{}", s) } @@ -345,7 +366,10 @@ impl Nodes { "Nat" => StaticTypes::TSet(Box::new(StaticTypes::TNatural)), "Int" => StaticTypes::TSet(Box::new(StaticTypes::TInteger)), "Real" => StaticTypes::TSet(Box::new(StaticTypes::TReal)), - "Universal" => StaticTypes::TSet(Box::new(StaticTypes::TUnknown)), + "Str" => StaticTypes::TSet(Box::new(StaticTypes::TString)), + "Sym" => StaticTypes::TSet(Box::new(StaticTypes::TSymbol)), + "Nil" => StaticTypes::TSet(Box::new(StaticTypes::TNil)), + "Any" => StaticTypes::TSet(Box::new(StaticTypes::TUnknown)), _ => ident.static_type.to_owned() } }, @@ -389,6 +413,19 @@ impl Nodes { } } + pub fn node_type(&self) -> &str { + match self { + Nodes::Ident(_) => "identifier", + Nodes::Num(_) => "numeric", + Nodes::Str(_) => "string", + Nodes::Sym(_) => "symbol", + Nodes::Empty(_) => "empty", + Nodes::Call(_) => "function-call", + Nodes::Block(_) => "code-block", + _ => "ungrammatical-meta-node" + } + } + pub fn get_name(&self) -> Option<&str> { match self { Nodes::Str(n) => Some(n.value.as_str()), @@ -464,6 +501,22 @@ impl CallNode { self.return_type = new_type; } + pub fn collect(&self) -> Vec<Nodes> { + fn make_argument_vector(call_node : &Nodes, operands : VecDeque<Nodes>) -> VecDeque<Nodes> { + let mut pushable = operands.clone(); + + if let Nodes::Call(call) = call_node { + pushable.push_front(call.operands[0].clone()); + return make_argument_vector(&*call.callee, pushable); + } + + pushable.push_front(call_node.clone()); + return pushable; + } + let q = make_argument_vector(&Nodes::Call(self.clone()), VecDeque::new()); + Vec::from(q) + } + pub fn is_unary(&self) -> bool { self.callee.ident().is_some() && !self.operands.is_empty() } diff --git a/src/syntax/location.rs b/src/syntax/location.rs @@ -1,4 +1,5 @@ /// Holds line, column and span of a lexical token. +#[derive(Clone)] pub struct Loc { /// Line number. pub line : u32, diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs @@ -13,6 +13,7 @@ pub fn parse(stream : Vec<Token>, file : &str) -> ast::Root { environment.start(); + environment.root } @@ -24,13 +25,15 @@ struct ParseEnvironment<'a> { ignore_newline : bool, line_number : usize, + eof_token : Token } impl<'a> ParseEnvironment<'a> { pub fn new(stream : Vec<Token>, file : &'a str) -> Self { ParseEnvironment { root: ast::Root::new(), - stream: stream, + eof_token: stream.last().unwrap().to_owned(), + stream, optable: operators::PrecedenceTable::new(), file, @@ -53,10 +56,13 @@ impl<'a> ParseEnvironment<'a> { self.root.branches.push(e); current = self.stream.get(0); } - //self.assign_types(); + self.shift(); } fn shift(&mut self) -> Token { + if self.stream.is_empty() { + self.stream.push(self.eof_token.clone()); + } let shifted = self.stream.remove(0); if shifted.location.line as usize != self.line_number { if self.root.branches.last().is_some() @@ -86,11 +92,15 @@ impl<'a> ParseEnvironment<'a> { ast::IdentNode::new(&token.string) }, _ => { + // If the operator is prefix: + // e.g. -a <=> ((-) a) + // Otherwise it's a partial application: + // e.g. (* a) <=> ((flip (*)) a) if prefix.is_none() { ast::CallNode::new( ast::CallNode::new( - ast::IdentNode::new(&token.string), - vec![ast::EmptyNode::new()]), + ast::IdentNode::new("flip"), + vec![ast::IdentNode::new(&token.string)]), vec![self.expr(500)]) } else { ast::CallNode::new( diff --git a/src/syntax/token.rs b/src/syntax/token.rs @@ -6,7 +6,7 @@ use unicode_width::UnicodeWidthStr; /// Contains all possible types/classes of /// lexiacal tokens. -#[derive(PartialEq)] +#[derive(PartialEq, Clone)] pub enum TokenType { /// Identifiers, variables, function names etc. Ident, @@ -67,6 +67,7 @@ impl fmt::Display for TokenType { /// Token structure, an individual lexiacal token, /// represented by its type/class, what it was written as /// in the program, and its location in the code. +#[derive(Clone)] pub struct Token { /// What type/class of token it is. pub class : TokenType, diff --git a/test.vh b/test.vh @@ -1,4 +1,15 @@ a : Nat -> Nat -> Int -a n m = n + 2*m +a n m = n - 2*m -a 1 2- \ No newline at end of file +a 1 2 + +-- a = n |-> (m |-> n + 2*m) +-- |_____________| +-- | +-- func: `a__1` +-- |_____________________| +-- | +-- func: `a__0` +-- |__________________________| +-- | +-- func: a+ \ No newline at end of file