commit 4c22f60b1198153ec1880f15c5a9d172510493fa
parent 4d5959d62ea46789cffe2099f3c57c5422ccebc5
Author: Demonstrandum <moi@knutsen.co>
Date: Mon, 12 Aug 2019 00:12:24 +0100
Compile simple curried functions.
Diffstat:
14 files changed, 602 insertions(+), 146 deletions(-)
diff --git a/README.md b/README.md
@@ -28,6 +28,12 @@ What's been done so far on the front-end:
instructions as well as basic arithmetic.
- [x] Access, assignment and retrieval of local variables within
code-block scope.
+ - [x] Generating curried functions.
+ - [ ] Optimise functions to not curry when currying is not neccesary (by tracking arity of
+ function's definition and function's call).
+ - [ ] Optimise functions to not search globally for variables when they
+ come from nested closures (nested closures implement currying).
+ - [ ] Optimise functions for tail calls.
- [ ] Track variable and function types.
- [ ] Marshaling, i.e. serialising the bytecode and storing it in a file
for future interpretation and execution by the virtual machine.
@@ -36,6 +42,9 @@ What's been done so far on the front-end:
The VM, i.e. the backend for the language, is being developed independently
and will have its own progress and check-list updates.
+### Example of what the compiler currently does:
+[current_compiler_test.md](https://github.com/Demonstrandum/valhallac/current_compiler_test.md)
+
### Description
This repository contains the front-end (parser and
@@ -57,3 +66,8 @@ verify proofs and such in and around set theory.
The language is a general purpose, but instead of being totally object-oriented,
or functional, etc., it's just set theory based. From what I've
gathered, it's not a very popular paradigm.
+
+### Dependencies
+Yikes...
+
+![deps](https://github.com/Demonstrandum/valhalla/raw/master/graph.png)+
\ No newline at end of file
diff --git a/current_compiler_test.md b/current_compiler_test.md
@@ -0,0 +1,264 @@
+# Compiler Example
+This file contains a regularly updated code example.
+The compiled code you will see is currently the best the compiler can do.
+Obviously, there are many optimisations yet to come.
+
+## Example 1
+Given the source code:
+```hs
+a : Nat -> Nat -> Int
+a n m = n - 2*m
+
+a 1 2
+
+-- a = n |-> (m |-> n + 2*m)
+-- |_____________|
+-- |
+-- func: `a__1`
+-- |_____________________|
+-- |
+-- func: `a__0`
+-- |__________________________|
+-- |
+-- func: a
+```
+
+---
+
+The code is lexically analysed and generates the following tokens-stream:
+```clojure
+[ [ Identifier: "a" (1, 1):1 ],
+ [ Operator: ":" (1, 3):1 ],
+ [ Identifier: "Nat" (1, 5):3 ],
+ [ Operator: "->" (1, 9):2 ],
+ [ Identifier: "Nat" (1, 12):3 ],
+ [ Operator: "->" (1, 16):2 ],
+ [ Identifier: "Int" (1, 19):3 ],
+ [ Terminator: "\n" (1, 22):1 ],
+ [ Identifier: "a" (2, 1):1 ],
+ [ Identifier: "n" (2, 3):1 ],
+ [ Identifier: "m" (2, 5):1 ],
+ [ Operator: "=" (2, 7):1 ],
+ [ Identifier: "n" (2, 9):1 ],
+ [ Operator: "-" (2, 11):1 ],
+ [ Numeric: "2" (2, 13):1 ],
+ [ Operator: "*" (2, 14):1 ],
+ [ Identifier: "m" (2, 15):1 ],
+ [ Terminator: "\n" (2, 16):1 ],
+ [ Terminator: "\n" (3, 1):1 ],
+ [ Identifier: "a" (4, 1):1 ],
+ [ Numeric: "1" (4, 3):1 ],
+ [ Numeric: "2" (4, 5):1 ],
+ [ Terminator: "\n" (4, 6):1 ],
+ [ Terminator: "\n" (5, 1):1 ],
+ [ Terminator: "\n" (6, 29):1 ],
+ [ Terminator: "\n" (7, 29):1 ],
+ [ Terminator: "\n" (8, 22):1 ],
+ [ Terminator: "\n" (9, 30):1 ],
+ [ Terminator: "\n" (10, 31):1 ],
+ [ Terminator: "\n" (11, 20):1 ],
+ [ Terminator: "\n" (12, 28):1 ],
+ [ Terminator: "\n" (13, 32):1 ],
+ [ Terminator: "\n" (14, 14):1 ],
+ [ End-Of-File: "\u{0}" (15, 17):1 ] ]
+```
+
+---
+
+From the token-stream, an abstract syntax tree (AST) is generated:
+```hs
+[|
+ %newfile{ :filename test.vh }
+ %newline{ :line 1 }
+ %call{
+ :yield anything
+ :callee (
+ %call{
+ :yield anything
+ :callee (
+ %ident{ :value ":"; :yield anything }
+ )
+ :operand [|
+ %ident{ :value "a"; :yield (Nat ↦ (Nat 🡒 Int)) }
+ |]
+ }
+ )
+ :operand [|
+ %call{
+ :yield anything
+ :callee (
+ %call{
+ :yield anything
+ :callee (
+ %ident{ :value "->"; :yield anything }
+ )
+ :operand [|
+ %ident{ :value "Nat"; :yield Nat }
+ |]
+ }
+ )
+ :operand [|
+ %call{
+ :yield anything
+ :callee (
+ %call{
+ :yield anything
+ :callee (
+ %ident{ :value "->"; :yield anything }
+ )
+ :operand [|
+ %ident{ :value "Nat"; :yield Nat }
+ |]
+ }
+ )
+ :operand [|
+ %ident{ :value "Int"; :yield Int }
+ |]
+ }
+ |]
+ }
+ |]
+ }
+ %newline{ :line 2 }
+ %call{
+ :yield anything
+ :callee (
+ %call{
+ :yield anything
+ :callee (
+ %ident{ :value "="; :yield anything }
+ )
+ :operand [|
+ %call{
+ :yield anything
+ :callee (
+ %call{
+ :yield anything
+ :callee (
+ %ident{ :value "a"; :yield anything }
+ )
+ :operand [|
+ %ident{ :value "n"; :yield anything }
+ |]
+ }
+ )
+ :operand [|
+ %ident{ :value "m"; :yield anything }
+ |]
+ }
+ |]
+ }
+ )
+ :operand [|
+ %call{
+ :yield natural
+ :callee (
+ %call{
+ :yield anything
+ :callee (
+ %ident{ :value "-"; :yield anything }
+ )
+ :operand [|
+ %ident{ :value "n"; :yield natural }
+ |]
+ }
+ )
+ :operand [|
+ %call{
+ :yield anything
+ :callee (
+ %call{
+ :yield anything
+ :callee (
+ %ident{ :value "*"; :yield anything }
+ )
+ :operand [|
+ %num{ :value 2; :yield natural }
+ |]
+ }
+ )
+ :operand [|
+ %ident{ :value "m"; :yield natural }
+ |]
+ }
+ |]
+ }
+ |]
+ }
+ %newline{ :line 4 }
+ %call{
+ :yield integer
+ :callee (
+ %call{
+ :yield (Nat ↦ Int)
+ :callee (
+ %ident{ :value "a"; :yield (Nat ↦ (Nat 🡒 Int)) }
+ )
+ :operand [|
+ %num{ :value 1; :yield natural }
+ |]
+ }
+ )
+ :operand [|
+ %num{ :value 2; :yield natural }
+ |]
+ }
+ %newline{ :line 15 }
+|]
+```
+
+---
+
+And the AST is compiled to bytecode.
+The following is a disassembly of the generated bytecode:
+
+```lisp
+a__1:
+ | ===Constants===============
+ | 0 | 2 => (Nat) |
+ | ===Locals==================
+ | 0 | m
+ | ===Globals=================
+ | 0 | n
+ | ===Bytecodes===============
+ | (00000010):PUSH_LOCAL 0
+ | (00000001):PUSH_CONST 0
+ | (00110001):N_MUL
+ | (00000011):PUSH_SUPER 0
+ | (00110000):U_SUB
+
+a__0:
+ | ===Constants===============
+ | 0 | a__1 => (Block) |
+ | 1 | :a__1 => (Sym) |
+ | ===Locals==================
+ | 0 | n
+ | ===Globals=================
+ | ===Bytecodes===============
+ | (00000001):PUSH_CONST 0
+ | (00000001):PUSH_CONST 1
+ | (00001100):MAKE_FUNC
+
+<main>:
+ | ===Constants===============
+ | 0 | a__0 => (Block) |
+ | 1 | :a => (Sym) |
+ | 2 | 2 => (Nat) |
+ | 3 | 1 => (Nat) |
+ | ===Locals==================
+ | 0 | a
+ | ===Globals=================
+ | ===Bytecodes===============
+ | (11111110):SET_LINE 2
+ | (00000001):PUSH_CONST 0
+ | (00000001):PUSH_CONST 1
+ | (00001100):MAKE_FUNC
+ | (00000101):STORE_LOCAL 0
+ | (11111110):SET_LINE 4
+ | (00000001):PUSH_CONST 2
+ | (00000001):PUSH_CONST 3
+ | (00000010):PUSH_LOCAL 0
+ | (00001001):CALL_1
+ | (00001001):CALL_1
+ | (11111110):SET_LINE 15
+```+
\ No newline at end of file
diff --git a/samples/functions.vh b/samples/functions.vh
@@ -1,6 +1,15 @@
-plus : Nat -> Nat -> Nat
+a : Nat -> Nat -> Int
+a n m = n - 2*m
-postulate do:
- plus n 0 = n
- plus 0 n = n
- plus (succ n) m = succ (plus n m)-
\ No newline at end of file
+a 1 2
+
+-- a = n |-> (m |-> n - 2*m)
+-- |_____________|
+-- |
+-- func: `a__1`
+-- |_____________________|
+-- |
+-- func: `a__0`
+-- |__________________________|
+-- |
+-- func: a+
\ No newline at end of file
diff --git a/src/compiler/block.rs b/src/compiler/block.rs
@@ -5,6 +5,7 @@ use super::super::err;
use super::super::syntax;
use syntax::ast;
+use syntax::ast::Nodes;
use super::element;
use super::instructions;
@@ -29,12 +30,12 @@ pub fn numerics_to_element<'a>(num : &ast::Numerics) -> Element<'a> {
}
#[derive(Clone)]
-struct IdentTypePair<'a>(String, &'a ast::Nodes);
+struct IdentTypePair<'a>(String, &'a Nodes);
#[derive(Clone)]
pub struct LocalBlock<'a> {
- pub name : &'a str,
- filename : &'a str,
+ pub name : String,
+ filename : String,
constants : Vec<Element<'a>>,
instructions : Vec<Instr>,
globals : Vec<String>,
@@ -55,10 +56,10 @@ impl<'a> PartialEq for LocalBlock<'a> {
}
impl<'a> LocalBlock<'a> {
- pub fn new(name : &'a str, filename : &'a str) -> Self {
+ pub fn new(n : &str, f : &str) -> Self {
LocalBlock {
- name,
- filename,
+ name: n.to_string(),
+ filename: f.to_string(),
constants: vec![],
instructions: vec![],
globals: vec![],
@@ -72,46 +73,103 @@ impl<'a> LocalBlock<'a> {
}
fn push_const_instr(&mut self, e : Element<'a>) {
- let index = append_unique(&mut self.constants, e);
- self.instructions.push(Instr::Operator(Operators::PUSH_CONST as u8));
- self.instructions.push(Instr::Operand(index as u16));
+ let index = append_unique(&mut self.constants, e) as u16;
+ self.push_operator(Operators::PUSH_CONST);
+ self.push_operand(index);
}
- fn ident_assignment(&mut self, left : &ast::IdentNode, right : &'a ast::Nodes) {
+ fn push_operator(&mut self, o : Operators) {
+ self.instructions.push(Instr::Operator(o as u8));
+ }
+
+ fn push_operand(&mut self, i : u16) {
+ self.instructions.push(Instr::Operand(i));
+ }
+
+ fn insert_local(&mut self, s : String) -> u16 {
+ let index = self.locals_map.len() as u16;
+ self.locals_map.insert(s, index);
+ index
+ }
+
+ fn ident_assignment(&mut self, left : &'a ast::IdentNode, right : &'a Nodes) {
if self.types_to_check.is_empty() {
- issue!(err::Types::TypeError, self.filename, err::NO_TOKEN, self.current_line,
+ issue!(err::Types::TypeError, &self.filename, err::NO_TOKEN, self.current_line,
"You must state what set `{}' is a member of. No type-annotation found.", left.value);
}
if self.locals_map.contains_key(&left.value) {
- issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line,
- "Cannot mutate value of `{}', as is already bound.", left.value);
+ issue!(err::Types::CompError, &self.filename, err::NO_TOKEN, self.current_line,
+ "Cannot mutate value of `{}', as it is already bound.", left.value);
}
- let index = self.locals_map.len() as u16;
- self.locals_map.insert(left.value.to_owned(), index);
+ let index = self.insert_local(left.value.to_owned());
self.emit(right);
if left.static_type == ast::StaticTypes::TUnknown
|| left.static_type != right.yield_type() {
- self.instructions.push(Instr::Operator(Operators::DUP as u8));
+ self.push_operator(Operators::DUP);
let type_node = self.types_to_check.pop_front().unwrap().1;
self.emit(type_node);
- self.instructions.push(Instr::Operator(Operators::CHECK_TYPE as u8));
+ self.push_operator(Operators::CHECK_TYPE);
} else { // Otherwise just pop, type was already checked statically so
// its of no use to include in the compiled program,
// as no dynamic checking is needed.
self.types_to_check.pop_front();
}
- self.instructions.push(Instr::Operator(Operators::STORE_LOCAL as u8));
- self.instructions.push(Instr::Operand(index));
+ self.push_operator(Operators::STORE_LOCAL);
+ self.push_operand(index);
+ }
+
+ fn function_assign(&mut self, left : &ast::CallNode, right : &'a Nodes) {
+ let mut arguments = left.collect();
+ let base_node = arguments.remove(0);
+
+ if let Nodes::Ident(ident) = base_node {
+ let name = format!("{}__{}", ident.value.to_owned(), arguments.len() - 1);
+
+ let mut last_block = LocalBlock::new(&name, &self.filename);
+ // TODO: Be more careful here, not always an ident.
+ // NEED TO DEAL WITH PATTERN MATCHING.
+ last_block.insert_local(arguments.last().unwrap().ident().unwrap().value.to_owned());
+ last_block.emit(right);
+
+ for i in (0..(arguments.len() - 1)).rev() {
+ let name = format!("{}__{}", ident.value, i);
+ let mut super_block = LocalBlock::new(
+ &name,
+ &self.filename);
+ // Also TODO: Pattern matching, be careful in the future.
+ super_block.insert_local(arguments[i].ident().unwrap().value.to_owned());
+
+ let block_name = last_block.name.clone();
+ super_block.push_const_instr(Element::ECode(last_block));
+ super_block.push_const_instr(Element::ESymbol(Symbol::new(&block_name)));
+ super_block.push_operator(Operators::MAKE_FUNC);
+ last_block = super_block;
+ }
+
+ let index = self.insert_local(ident.value.to_owned());
+
+ self.push_const_instr(Element::ECode(last_block));
+ self.push_const_instr(Element::ESymbol(Symbol::new(&ident.value)));
+ self.push_operator(Operators::MAKE_FUNC);
+ self.push_operator(Operators::STORE_LOCAL);
+ self.push_operand(index);
+ return;
+ }
+
+ // A function of multiple arguments (say 3 f.eks),
+ // must generate a function, which when called returns
+ // a function, and when that function is called, it returns
+ // the final value.
}
- fn annotation(&mut self, left : &ast::IdentNode, right : &'a ast::Nodes) {
+ fn annotation(&mut self, left : &ast::IdentNode, right : &'a Nodes) {
self.types_to_check.push_back(IdentTypePair(left.value.to_owned(), right));
}
- fn emit(&mut self, node : &'a ast::Nodes) {
+ fn emit(&mut self, node : &'a Nodes) {
match node {
- ast::Nodes::Line(line_node) => {
+ Nodes::Line(line_node) => {
let len = self.instructions.len();
if len > 1 {
if self.instructions[len - 2] == Instr::Operator(Operators::SET_LINE as u8) {
@@ -120,31 +178,31 @@ impl<'a> LocalBlock<'a> {
}
}
self.current_line = line_node.line;
- self.instructions.push(Instr::Operator(Operators::SET_LINE as u8));
- self.instructions.push(Instr::Operand(self.current_line as u16));
+ self.push_operator(Operators::SET_LINE);
+ self.push_operand(self.current_line as u16);
}
- ast::Nodes::Ident(ident_node) => {
+ Nodes::Ident(ident_node) => {
let s = &ident_node.value;
if !self.locals_map.contains_key(s) {
- self.instructions.push(Instr::Operator(Operators::PUSH_SUPER as u8));
- let index = append_unique(&mut self.globals, s.to_owned());
- self.instructions.push(Instr::Operand(index as u16));
+ self.push_operator(Operators::PUSH_SUPER);
+ let index = append_unique(&mut self.globals, s.to_owned()) as u16;
+ self.push_operand(index);
return;
}
- self.instructions.push(Instr::Operator(Operators::PUSH_LOCAL as u8));
- self.instructions.push(Instr::Operand(self.locals_map[s]));
+ self.push_operator(Operators::PUSH_LOCAL);
+ self.push_operand(self.locals_map[s]);
},
- ast::Nodes::Num(num_node) => {
+ Nodes::Num(num_node) => {
self.push_const_instr(numerics_to_element(&num_node.value));
},
- ast::Nodes::Str(str_node) => {
+ Nodes::Str(str_node) => {
self.push_const_instr(Element::EString(&str_node.value));
},
- ast::Nodes::Sym(sym_node) => {
+ Nodes::Sym(sym_node) => {
self.push_const_instr(Element::ESymbol(Symbol::new(&sym_node.value)));
},
- ast::Nodes::Call(call_node) => {
+ Nodes::Call(call_node) => {
if call_node.is_binary() {
let ident = call_node.callee.call().unwrap().callee.ident().unwrap();
let args = vec![
@@ -155,26 +213,26 @@ impl<'a> LocalBlock<'a> {
// Check for cast.
if ident.value == "cast" {
self.emit(args[0]);
- self.instructions.push(Instr::Operator(Operators::CAST as u8));
+ self.push_operator(Operators::CAST);
if let Some(cast_name) = args[1].get_name() {
let cast_to : u16 = match cast_name {
"Real" => 0b00000011,
"Int" => 0b00000010,
"Nat" => 0b00000001,
- _ => issue!(err::Types::TypeError, self.filename, err::NO_TOKEN, self.current_line,
+ _ => issue!(err::Types::TypeError, &self.filename, err::NO_TOKEN, self.current_line,
"Compiler does not know how to cast to `{}'.", cast_name)
};
let cast_from = match args[0].yield_type() {
ast::StaticTypes::TReal => 0b00000011,
ast::StaticTypes::TInteger => 0b00000010,
ast::StaticTypes::TNatural => 0b00000001,
- _ => issue!(err::Types::TypeError, self.filename, err::NO_TOKEN, self.current_line,
+ _ => issue!(err::Types::TypeError, &self.filename, err::NO_TOKEN, self.current_line,
"Compiler does not know how to cast from `{}'.", args[0].yield_type())
};
- self.instructions.push(Instr::Operand(cast_from << 8 | cast_to));
+ self.push_operand(cast_from << 8 | cast_to);
} else {
- issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line,
+ issue!(err::Types::CompError, &self.filename, err::NO_TOKEN, self.current_line,
"Cast-type provided to `cast' has to be a type-name.")
}
return;
@@ -183,8 +241,10 @@ impl<'a> LocalBlock<'a> {
// Check for assignment.
if ident.value == "=" {
// Direct variable assignment:
- if let ast::Nodes::Ident(left) = args[0] {
+ if let Nodes::Ident(left) = args[0] {
self.ident_assignment(left, args[1]);
+ } else if let Nodes::Call(left) = args[0] {
+ self.function_assign(left, args[1]);
}
return;
}
@@ -194,7 +254,7 @@ impl<'a> LocalBlock<'a> {
// If the LHS is not an ident, it is not a
// valid annotation.
if args[0].ident().is_none() {
- issue!(err::Types::CompError, self.filename, err::NO_TOKEN, self.current_line,
+ issue!(err::Types::CompError, &self.filename, err::NO_TOKEN, self.current_line,
"Left of `:` type annotator must be an identifier.");
}
let left = args[0].ident().unwrap();
@@ -213,15 +273,18 @@ impl<'a> LocalBlock<'a> {
return;
}
}
+ // TODO: Optimise to implicitly ignore currying and use CALL_N instead.
+ // Also, check that we are indeed calling a function, and not anything else
+ // by checking the static yield type.
self.emit(&call_node.operands[0]);
self.emit(&*call_node.callee);
- self.instructions.push(Instr::Operator(Operators::CALL_1 as u8));
+ self.push_operator(Operators::CALL_1);
},
_ => ()
};
}
- pub fn generate(&mut self, nodes : &'a Vec<ast::Nodes>) {
+ pub fn generate(&mut self, nodes : &'a Vec<Nodes>) {
for node in nodes {
self.emit(node);
}
@@ -230,21 +293,31 @@ impl<'a> LocalBlock<'a> {
impl<'a> fmt::Display for LocalBlock<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "===Constants===============\n")?;
+ for c in &self.constants {
+ if let Element::ECode(local_block) = c {
+ write!(f, "{}", local_block)?;
+ }
+ }
+ write!(f, "\n{}:\n", self.name)?;
+ write!(f, " | ===Constants===============\n")?;
for (i, c) in self.constants.iter().enumerate() {
- write!(f, "{: >3} | {} |\n", i, c)?;
+ write!(f, " | {: >3} | {} |\n", i, c)?;
}
- write!(f, "===Locals==================\n")?;
+ write!(f, " | ===Locals==================\n")?;
for key in self.locals_map.keys() {
- write!(f, "{: >3} | {}\n", self.locals_map[key], key)?;
+ write!(f, " | {: >3} | {}\n", self.locals_map[key], key)?;
}
- write!(f, "===Globals=================\n")?;
+ write!(f, " | ===Globals=================\n")?;
for (i, c) in self.globals.iter().enumerate() {
- write!(f, "{: >3} | {}\n", i, c)?;
+ write!(f, " | {: >3} | {}\n", i, c)?;
}
- write!(f, "===Bytecodes===============\n")?;
+ write!(f, " | ===Bytecodes===============\n")?;
for inst in &self.instructions {
- write!(f, "{}", inst)?;
+ if let Instr::Operand(_) = inst {
+ write!(f, "{}", inst)?;
+ } else {
+ write!(f, " | {}", inst)?;
+ }
}
write!(f, "")
}
diff --git a/src/compiler/element.rs b/src/compiler/element.rs
@@ -7,10 +7,10 @@ use snailquote::escape;
use super::block;
use super::types;
-#[derive(Clone, Copy)]
-pub struct Symbol<'a> {
+#[derive(Clone)]
+pub struct Symbol {
hash : u64,
- string : &'a str
+ string : String
}
fn hash_symbol(string : &str) -> u64 {
@@ -19,22 +19,22 @@ fn hash_symbol(string : &str) -> u64 {
s.finish()
}
-impl<'a> Symbol<'a> {
- pub fn new(string : &'a str) -> Self {
+impl Symbol {
+ pub fn new(s : &str) -> Self {
Symbol {
- hash: hash_symbol(string),
- string
+ hash: hash_symbol(s),
+ string: s.to_owned()
}
}
}
-impl<'a> fmt::Display for Symbol<'a> {
+impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, ":{}", self.string)
}
}
-impl<'a> PartialEq for Symbol<'a> {
+impl PartialEq for Symbol {
fn eq(&self, other : &Self) -> bool {
self.hash == other.hash
}
@@ -46,7 +46,7 @@ pub enum Element<'a> {
EInteger(isize),
EReal(f64),
EString(&'a str),
- ESymbol(Symbol<'a>),
+ ESymbol(Symbol),
ECode(block::LocalBlock<'a>),
ESet(types::Set<'a>),
ENil
diff --git a/src/compiler/instructions.rs b/src/compiler/instructions.rs
@@ -42,6 +42,7 @@ pub enum Operators {
CALL_1 = 9,
CHECK_TYPE = 10,
CAST = 11,
+ MAKE_FUNC = 12,
N_ADD = 40,
I_ADD = 41,
@@ -82,6 +83,7 @@ impl fmt::Display for Operators {
Operators::CALL_1 => "CALL_1\n",
Operators::CHECK_TYPE => "CHECK_TYPE\n",
Operators::CAST => "CAST",
+ Operators::MAKE_FUNC => "MAKE_FUNC\n",
Operators::N_ADD => "N_ADD\n",
Operators::I_ADD => "I_ADD\n",
diff --git a/src/err.rs b/src/err.rs
@@ -35,8 +35,8 @@ pub fn tissue(class : Types, filename : &str, token : &token::Token, message :
let file = fs::File::open(filename).expect("Invalid filename for error message.");
let line = BufReader::new(file).lines().nth((token.location.line - 1) as usize).unwrap().unwrap();
- let unindented = unindent(message);
- eprintln!("{}{} {}", "issue".bold().red(), ":".white(), unindented.bold());
+ let formatted = unindent(message).split('\n').collect::<Vec<&str>>().join("\n ");
+ eprintln!("{}{} {}", "issue".bold().red(), ":".white(), formatted.bold());
eprint!("{}", "".clear());
eprintln!(" ==> {class} in (`{file}`:{line}:{col}):\n{space}|\n{line_str}| {stuff}",
class=class.to_string().bold(), file=filename, line=token.location.line,
@@ -51,8 +51,8 @@ pub fn lissue(class : Types, filename : &str, line_n : usize, message : &str) {
let file = fs::File::open(filename).expect("Invalid filename for error message.");
let line = BufReader::new(file).lines().nth((line_n - 1) as usize).unwrap().unwrap();
- let unindented = unindent(message);
- eprintln!("{}{} {}", "issue".bold().red(), ":".white(), unindented.bold());
+ let formatted = unindent(message).split("\n").collect::<Vec<&str>>().join("\n ");
+ eprintln!("{}{} {}", "issue".bold().red(), ":".white(), formatted.bold());
eprint!("{}", "".clear());
eprintln!(" ==> {class} in (`{file}`:{line}):\n{space}|\n{line_str}| {stuff}",
class=class.to_string().bold(), file=filename, line=line_n,
diff --git a/src/lib.rs b/src/lib.rs
@@ -20,6 +20,6 @@ pub fn parse(filename : &str) {
code_block.generate(&root.branches);
- println!("Code Block:\n{}", code_block)
+ println!("Code Blocks:\n{}", code_block)
}
diff --git a/src/syntax/analyser.rs b/src/syntax/analyser.rs
@@ -1,5 +1,4 @@
-use std::collections::{HashMap, VecDeque};
-use std::cell::RefCell;
+use std::collections::HashMap;
use crate::err;
@@ -163,8 +162,6 @@ fn balance_types(node : &Nodes) -> Nodes {
return node.to_owned();
}
-type VarType = (String, ast::StaticTypes);
-
#[derive(Clone)]
struct TypeChecker {
pub source_line : usize,
@@ -218,78 +215,97 @@ impl TypeChecker {
return clone;
} else {
// Error: We need the left to be an ident.
- issue!(err::Types::TypeError,
+ issue!(err::Types::ParseError,
self.source_file.as_str(),
err::NO_TOKEN, self.source_line,
"The left side of the member-of operator (`:`), must be an identifier.
- Only variable names can be declared as being members of sets.");
+ You supplied a type of `{}'.
+ Only variable names can be declared as being members of sets.",
+ callee.operands[0].node_type());
}
},
"=" => {
// This is useful for checking variables in functions.
- if let Nodes::Call(ref assignee) = callee.operands[0] {
- // Check all the types in the annotation (A -> B -> C)
- // and match them to the arguments found on the left side
- // of the assignment (=). Compile these matches into a list
- // and pass that list into a new TypeChecker object which checks
- // the right hand side of the assignment, matching up the sub-scoped
- // variables.
-
- // A -> B -> C -> D
- // f a b c = d
- // <=>
- // (A -> (B -> (C -> D)))
- // ( ((=) ( (((f a) b) c) )) d)
- fn collect_args(s : &TypeChecker, call_node : &Nodes, operands : Vec<Nodes>) -> Vec<Nodes> {
- let mut pushed = operands.clone();
-
- if let Nodes::Call(call) = call_node {
- pushed.insert(0, call.operands[0].clone());
- return collect_args(s, &*call.callee, pushed);
+ match &callee.operands[0] {
+ Nodes::Call(ref assignee) => {
+ // Check all the types in the annotation (A -> B -> C)
+ // and match them to the arguments found on the left side
+ // of the assignment (=). Compile these matches into a list
+ // and pass that list into a new TypeChecker object which checks
+ // the right hand side of the assignment, matching up the sub-scoped
+ // variables.
+
+ // A -> B -> C -> D
+ // f a b c = d
+ // <=>
+ // (A -> (B -> (C -> D)))
+ // ( ((=) ( (((f a) b) c) )) d)
+
+ let mut operands = assignee.collect();
+ let mut func_checker = self.clone();
+
+ let base_node = operands.remove(0);
+ if base_node.ident().is_none() {
+ issue!(err::Types::ParseError,
+ &self.source_file, err::NO_TOKEN, self.source_line,
+ "Function definitions must have the defining function's base caller
+ be an identifier! You're trying to define a function that has
+ `{}' as base caller...", base_node.node_type());
}
- if let Nodes::Ident(ident) = call_node {
- pushed.insert(0, call_node.clone());
- return pushed;
+ let maybe_type = self.ident_map.get(&base_node.ident().unwrap().value);
+ if maybe_type.is_none() {
+ println!("{}", base_node);
+ println!("{:?}", self.ident_map);
+ issue!(err::Types::TypeError,
+ self.source_file.as_str(),
+ err::NO_TOKEN, self.source_line,
+ "Cannot find type annotation for the
+ function definition of `{}'.",
+ base_node.ident().unwrap().value);
}
- issue!(err::Types::ParseError,
- s.source_file.as_str(),
- err::NO_TOKEN, s.source_line,
- "Function definition must have base caller be an identifier.");
- }
-
- let mut operands = collect_args(&self, &callee.operands[0], vec![]);
- let mut func_checker = self.clone();
-
- let maybe_type = self.ident_map.get(&operands.remove(0).ident().unwrap().value);
- if maybe_type.is_none() {
- issue!(err::Types::TypeError,
- self.source_file.as_str(),
- err::NO_TOKEN, self.source_line,
- "Cannot find type annotation for this function.");
- }
- let mut t = maybe_type.unwrap().clone();
-
- for operand in operands {
- if let Nodes::Ident(ident) = operand {
- if let ast::StaticTypes::TSet(f) = &t {
- if let ast::StaticTypes::TFunction(i, o) = *f.clone() {
- func_checker.ident_map.insert(ident.value, *i.clone());
- t = *o.clone();
+ let mut t = maybe_type.unwrap().clone();
+
+ for operand in operands {
+ if let Nodes::Ident(ident) = operand {
+ if let ast::StaticTypes::TSet(f) = &t {
+ if let ast::StaticTypes::TFunction(i, o) = *f.clone() {
+ func_checker.ident_map.insert(ident.value, *i.clone());
+ t = *o.clone();
+ }
}
}
}
- }
- call.operands[0] = func_checker.type_branch(&call.operands[0]);
- return clone;
+ call.operands[0] = func_checker.type_branch(&call.operands[0]);
+ return clone;
+ }
+ Nodes::Ident(_assignee) => {
+ // TODO:
+ // Here, if the ident exists in the ident_map, that means
+ // we need to check if both sides of the `=`'s types match up.
+ // If it does not exist, we need to infer its type by looking at
+ // the RHS and statically determine the RHS's type, and adding that
+ // type to the ident_map for the assignee.
+ }
+ _ => ()
}
}
_ => ()
}
}
}
-
+ // TODO HERE:
+ // We need to check to see if the function being called
+ // has a statically determined type, and if so, check that
+ // the operand to that function call has the exact same
+ // static type.
+ // If there is a type-mismatch, just throw an `issue!`.
+ // (If the function is statically typed, so
+ // must all the arguments be as well).
+ // The call must have a yield of type `function` and the
+ // input part of the function (input |-> output), must match
+ // the type of the operand. :^)
call.callee = Box::new(self.type_branch(&*call.callee));
call.operands = vec![self.type_branch(&call.operands[0])];
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
@@ -1,4 +1,5 @@
use std::{fmt, ops};
+use std::collections::VecDeque;
/// Identifiers, node representing a name that
/// will represent a value stored.
@@ -266,16 +267,36 @@ impl StaticTypes {
impl fmt::Display for StaticTypes {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let ss;
let s = match self {
- StaticTypes::TNatural => "Nat".to_string(),
- StaticTypes::TInteger => "Int".to_string(),
- StaticTypes::TReal => "Real".to_string(),
- StaticTypes::TString => "Str".to_string(),
- StaticTypes::TSymbol => "Sym".to_string(),
- StaticTypes::TSet(st) => format!("Set {}", st),
- StaticTypes::TFunction(o, r) => format!("({} -> {})", o, r),
- StaticTypes::TNil => "Nil".to_string(),
- StaticTypes::TUnknown => "Universal".to_string(),
+ StaticTypes::TNatural => "natural",
+ StaticTypes::TInteger => "integer",
+ StaticTypes::TReal => "real",
+ StaticTypes::TString => "string",
+ StaticTypes::TSymbol => "symbol",
+ StaticTypes::TSet(st) => match *st.clone() {
+ StaticTypes::TNatural => "Nat",
+ StaticTypes::TInteger => "Int",
+ StaticTypes::TReal => "Real",
+ StaticTypes::TString => "Str",
+ StaticTypes::TSymbol => "Sym",
+ StaticTypes::TFunction(o, r) => {
+ ss = format!("({} \u{1f852} {})", o, r);
+ ss.as_str()
+ },
+ StaticTypes::TNil => "Nil",
+ StaticTypes::TUnknown => "Any",
+ _ => {
+ ss = format!("Set {}", st);
+ ss.as_str()
+ },
+ },
+ StaticTypes::TFunction(o, r) => {
+ ss = format!("({} \u{21a6} {})", o, r);
+ ss.as_str()
+ },
+ StaticTypes::TNil => "nil",
+ StaticTypes::TUnknown => "anything",
};
write!(f, "{}", s)
}
@@ -345,7 +366,10 @@ impl Nodes {
"Nat" => StaticTypes::TSet(Box::new(StaticTypes::TNatural)),
"Int" => StaticTypes::TSet(Box::new(StaticTypes::TInteger)),
"Real" => StaticTypes::TSet(Box::new(StaticTypes::TReal)),
- "Universal" => StaticTypes::TSet(Box::new(StaticTypes::TUnknown)),
+ "Str" => StaticTypes::TSet(Box::new(StaticTypes::TString)),
+ "Sym" => StaticTypes::TSet(Box::new(StaticTypes::TSymbol)),
+ "Nil" => StaticTypes::TSet(Box::new(StaticTypes::TNil)),
+ "Any" => StaticTypes::TSet(Box::new(StaticTypes::TUnknown)),
_ => ident.static_type.to_owned()
}
},
@@ -389,6 +413,19 @@ impl Nodes {
}
}
+ pub fn node_type(&self) -> &str {
+ match self {
+ Nodes::Ident(_) => "identifier",
+ Nodes::Num(_) => "numeric",
+ Nodes::Str(_) => "string",
+ Nodes::Sym(_) => "symbol",
+ Nodes::Empty(_) => "empty",
+ Nodes::Call(_) => "function-call",
+ Nodes::Block(_) => "code-block",
+ _ => "ungrammatical-meta-node"
+ }
+ }
+
pub fn get_name(&self) -> Option<&str> {
match self {
Nodes::Str(n) => Some(n.value.as_str()),
@@ -464,6 +501,22 @@ impl CallNode {
self.return_type = new_type;
}
+ pub fn collect(&self) -> Vec<Nodes> {
+ fn make_argument_vector(call_node : &Nodes, operands : VecDeque<Nodes>) -> VecDeque<Nodes> {
+ let mut pushable = operands.clone();
+
+ if let Nodes::Call(call) = call_node {
+ pushable.push_front(call.operands[0].clone());
+ return make_argument_vector(&*call.callee, pushable);
+ }
+
+ pushable.push_front(call_node.clone());
+ return pushable;
+ }
+ let q = make_argument_vector(&Nodes::Call(self.clone()), VecDeque::new());
+ Vec::from(q)
+ }
+
pub fn is_unary(&self) -> bool {
self.callee.ident().is_some() && !self.operands.is_empty()
}
diff --git a/src/syntax/location.rs b/src/syntax/location.rs
@@ -1,4 +1,5 @@
/// Holds line, column and span of a lexical token.
+#[derive(Clone)]
pub struct Loc {
/// Line number.
pub line : u32,
diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs
@@ -13,6 +13,7 @@ pub fn parse(stream : Vec<Token>, file : &str) -> ast::Root {
environment.start();
+
environment.root
}
@@ -24,13 +25,15 @@ struct ParseEnvironment<'a> {
ignore_newline : bool,
line_number : usize,
+ eof_token : Token
}
impl<'a> ParseEnvironment<'a> {
pub fn new(stream : Vec<Token>, file : &'a str) -> Self {
ParseEnvironment {
root: ast::Root::new(),
- stream: stream,
+ eof_token: stream.last().unwrap().to_owned(),
+ stream,
optable: operators::PrecedenceTable::new(),
file,
@@ -53,10 +56,13 @@ impl<'a> ParseEnvironment<'a> {
self.root.branches.push(e);
current = self.stream.get(0);
}
- //self.assign_types();
+ self.shift();
}
fn shift(&mut self) -> Token {
+ if self.stream.is_empty() {
+ self.stream.push(self.eof_token.clone());
+ }
let shifted = self.stream.remove(0);
if shifted.location.line as usize != self.line_number {
if self.root.branches.last().is_some()
@@ -86,11 +92,15 @@ impl<'a> ParseEnvironment<'a> {
ast::IdentNode::new(&token.string)
},
_ => {
+ // If the operator is prefix:
+ // e.g. -a <=> ((-) a)
+ // Otherwise it's a partial application:
+ // e.g. (* a) <=> ((flip (*)) a)
if prefix.is_none() {
ast::CallNode::new(
ast::CallNode::new(
- ast::IdentNode::new(&token.string),
- vec![ast::EmptyNode::new()]),
+ ast::IdentNode::new("flip"),
+ vec![ast::IdentNode::new(&token.string)]),
vec![self.expr(500)])
} else {
ast::CallNode::new(
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
@@ -6,7 +6,7 @@ use unicode_width::UnicodeWidthStr;
/// Contains all possible types/classes of
/// lexiacal tokens.
-#[derive(PartialEq)]
+#[derive(PartialEq, Clone)]
pub enum TokenType {
/// Identifiers, variables, function names etc.
Ident,
@@ -67,6 +67,7 @@ impl fmt::Display for TokenType {
/// Token structure, an individual lexiacal token,
/// represented by its type/class, what it was written as
/// in the program, and its location in the code.
+#[derive(Clone)]
pub struct Token {
/// What type/class of token it is.
pub class : TokenType,
diff --git a/test.vh b/test.vh
@@ -1,4 +1,15 @@
a : Nat -> Nat -> Int
-a n m = n + 2*m
+a n m = n - 2*m
-a 1 2-
\ No newline at end of file
+a 1 2
+
+-- a = n |-> (m |-> n + 2*m)
+-- |_____________|
+-- |
+-- func: `a__1`
+-- |_____________________|
+-- |
+-- func: `a__0`
+-- |__________________________|
+-- |
+-- func: a+
\ No newline at end of file