brokkr

Bytecode virtual machine for Valhalla.
git clone git://git.knutsen.co/brokkr
Log | Files | Refs | README | LICENSE

commit a68ac7ea570579eb90b45e75fbf550878f0a882b
parent 9a50799821ec4ea2a3b7de85a21233f001694e9a
Author: Demonstrandum <moi@knutsen.co>
Date:   Sun, 23 Feb 2020 21:17:15 +0000

Basic execution of bytecode now implemented.

Diffstat:
MCargo.toml | 5+++--
Mscripts/gen_bytecode_md_spec.rb | 4++--
Msrc/bin.rs | 10++++++++--
Msrc/vm/address.rs | 23++++++++++++++---------
Asrc/vm/call_stack.rs | 30++++++++++++++++++++++++++++++
Asrc/vm/evaluation.rs | 159+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/vm/frame.rs | 36+++++++++++++++++++++++++++++++++++-
Msrc/vm/mod.rs | 4+++-
Msrc/vm/opcodes.rs | 3++-
Msrc/vm/unmarshal.rs | 17++++++-----------
10 files changed, 262 insertions(+), 29 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -6,17 +6,18 @@ repository = "https://github.com/Demonstrandum/brokkr" documentation = "https://github.com/Demonstrandum/brokkr" keywords = ["set-theory", "assembler", "bytecode", "interpreter", "virtual-machine"] license = "GPL-3.0" -license-file = "LICENSE.md" readme = "README.md" version = "0.1.0" authors = ["Demonstrandum <moi@knutsen.co>"] edition = "2018" +[features] +debug = [] # No deps. + [lib] name = "brokkr" path = "src/lib.rs" - [[bin]] name = "brokkr" path = "src/bin.rs" diff --git a/scripts/gen_bytecode_md_spec.rb b/scripts/gen_bytecode_md_spec.rb @@ -38,12 +38,12 @@ Sizes are as follows: | Type | Size | |----------|--------------------------| -| Operator | 1 Byte (8 Bits, `u8`) | +| Operator | 1 Byte (8 Bits, `u8`) | | Operand | 2 Bytes (16 Bits, `u16`) | EOF puts "# Brokkr Bytecode Specification" puts KEY -puts "## Bytecodes" +puts "## Bytecodes\n" puts TABLE diff --git a/src/bin.rs b/src/bin.rs @@ -1,9 +1,11 @@ use brokkr::vm::unmarshal; +use brokkr::vm::evaluation; use std::env; use std::io::prelude::*; use std::{fs::File, path::Path}; + pub fn main() -> Result<(), std::io::Error> { let mut args : Vec<String> = env::args().collect(); args.remove(0); @@ -11,7 +13,7 @@ pub fn main() -> Result<(), std::io::Error> { let files = args.iter().filter(|arg| Path::new(arg).exists()); for file in files { - #[cfg(debug_assertions)] + #[cfg(feature="debug")] println!("Reading file {}...", file); let mut f = File::open(file) @@ -20,7 +22,11 @@ pub fn main() -> Result<(), std::io::Error> { f.read_to_end(&mut buffer) .expect("Could not dump file contents to bytesteam."); - let _frame = unmarshal::parse_blob(&buffer); + let frame = unmarshal::parse_blob(&buffer); + + let mut env = evaluation::Environment::new(); + env.entry(frame); + env.execute(); } Ok(()) diff --git a/src/vm/address.rs b/src/vm/address.rs @@ -1,15 +1,10 @@ use std::fmt; -use num_traits::FromPrimitive; #[derive(Copy, Clone, PartialEq, Eq, Hash)] pub struct Address(pub usize); -/// # Address to Generic Types -/// The type of a new `Address` is holding must always -/// implement the `Copy` trait. -/// When making an `Address` for a struct (call it `Foo`), -/// please always make sure you're giving it a reference (`&Foo`), -/// since references implement the `Copy` trait. +/// # Address to any type +/// Pass only objects with size known at compile time to `new`. #[allow(clippy::inline_always)] impl Address { #[inline(always)] @@ -35,10 +30,20 @@ impl Address { /// Get value of self.0, if it does not represent a pointer, /// and instead is a value such as f64, stored as a usize. + /// # Safety + /// This function transmutes types. + #[inline(always)] + #[must_use] + pub unsafe fn value<T>(self) -> T { + std::mem::transmute_copy(&self.0) + } + + /// # Safety + /// This function transmutes types. #[inline(always)] #[must_use] - pub fn value<T>(self) -> T { - unsafe { std::mem::transmute_copy(&self.0) } + pub unsafe fn from_value<T>(value : &T) -> Self { + std::mem::transmute_copy(value) } /// Null-pointer, for temporary use. diff --git a/src/vm/call_stack.rs b/src/vm/call_stack.rs @@ -0,0 +1,30 @@ +use super::frame::Frame; + +const DEFAULT_STACK_DEPTH : usize = 2_500_000; + +pub struct CallStack { + pub stack : Vec<Frame>, + pub max_depth : usize +} + +impl CallStack { + #[must_use] + pub fn new() -> Self { + Self::default() + } + + #[inline] + pub fn push(&mut self, frame : Frame) { + self.stack.push(frame); + } +} + +impl Default for CallStack { + fn default() -> Self { + let max_depth = DEFAULT_STACK_DEPTH; + Self { + stack: Vec::with_capacity(max_depth), + max_depth + } + } +} diff --git a/src/vm/evaluation.rs b/src/vm/evaluation.rs @@ -0,0 +1,159 @@ +use super::address::Address; +use super::opcodes::Operators as Op; +use super::frame::{Frame, Instruction as Instr}; +use super::call_stack::CallStack; + +use num_traits::FromPrimitive; + +use std::mem; + +macro_rules! cast_from { + ($from:expr, $tos:expr, $to:ty) => { unsafe { + match $from { + 0x01 => Address::from_value(&($tos.0 as $to)), + 0x02 => Address::from_value(&($tos.value::<isize>() as $to)), + 0x03 => Address::from_value(&($tos.value::<f64>() as $to)), + _ => Address::null() + } + }}; +} + +/// Implement execution for frames. +impl Frame { + #[inline] + fn current_instr(&self) -> Instr { self.instructions[self.pc] } + + pub fn execute(&mut self) -> Address { + self.pc = 0; + loop { + let top = self.evaluations.len().wrapping_sub(1); + + let instr = self.current_instr(); + if instr == Instr(0x00) { return Address::null(); } + + let instr_word = usize::from(instr); + let op = Op::from_usize(instr_word) + .expect("Operand was consumed as operator, this shouldn't happen."); + + if op == Op::YIELD { + let tos = self.evaluations.get(top) + .expect("Cannot yield when stack is empty."); + return *tos; + } + + match op { + Op::SET_LINE => { + self.pc += 1; + self.line = usize::from(self.instructions[self.pc]); + }, + Op::PUSH_CONST => { + self.pc += 1; + let index = usize::from(self.current_instr()); + self.evaluations.push(self.constants[index]); + }, + Op::PUSH_LOCAL => { + self.pc += 1; + let index = usize::from(self.current_instr()); + self.evaluations.push(self.locals[index]); + }, + Op::POP => { self.evaluations.pop(); }, + Op::STORE_LOCAL => { + self.pc += 1; + let index = usize::from(self.current_instr()); + let value = self.evaluations.pop() + .expect("Stack empty, cannot store value."); + self.locals[index] = value; + }, + Op::CAST => { + self.pc += 1; + let operand = usize::from(self.current_instr()); + let to = operand & 0x00ff; + let from = operand >> 8; + + let tos = self.evaluations.get(top) + .expect("Cast, but stack is empty."); + + self.evaluations[top] = match to { + // Real + 0x03 => cast_from!(from, tos, f64), + // Integer + 0x02 => cast_from!(from, tos, isize), + // Natural + 0x01 => cast_from!(from, tos, usize), + _ => panic!("Unknown cast type (0x{:02X})", to) + }; + }, + Op::RAW_PRINT => { + let tos = self.evaluations[top]; + + self.pc += 1; + let operand = usize::from(self.current_instr()); + + print!("raw: "); + unsafe { + match operand { + 0x01 => println!("{}", tos.value::<usize>()), + 0x02 => println!("{}", tos.value::<isize>()), + 0x03 => println!("{}", tos.value::<f64>()), + 0x04 => println!("{}", tos.reference::<&str>()), + _ => panic!("Unhandled RAW_PRINT type.") + } + }; + }, + Op::R_ADD => { + let tos = self.evaluations.get(top) + .expect("Cannot perform addition when stack is empty."); + let sos = self.evaluations.get(top - 1) + .expect("Cannot perform addition when stack is only 1 deep."); + + self.evaluations[top] = unsafe { + Address::from_value(&( + tos.value::<f64>() + sos.value::<f64>() + )) + } + }, + _ => panic!( + "Unaccounted for opcode (0x{:04X}), cannot execute.", + instr_word) + } + + self.pc += 1; + } + } +} + +pub struct Environment { + pub calls : CallStack +} + +impl Environment { + #[must_use] + pub fn new() -> Self { + Self::default() + } + + #[inline] + pub fn entry(&mut self, frame : Frame) { + if !self.calls.stack.is_empty() { + panic!("Cannot set entry point when stack is not empty."); + } + + self.calls.push(frame); + } + + #[inline] + pub fn execute(&mut self) { + let last = self.calls.stack.len() - 1; + let top = &mut self.calls.stack[last]; + + top.execute(); + } +} + +impl Default for Environment { + fn default() -> Self { + Self { + calls: CallStack::new(), + } + } +} diff --git a/src/vm/frame.rs b/src/vm/frame.rs @@ -17,22 +17,56 @@ pub struct Frame { /// Vector of all constants used in the _function_. pub constants : Vec<Address>, /// Set of names for local variables. - pub locals : HashSet<String>, + pub local_names : HashSet<String>, + /// Vector of all local variables. + pub locals : Vec<Address>, /// Instructions for execution of the _function_. pub instructions : Vec<Instruction>, /// Maximum depth for the evaluation-stack. pub stack_depth : u16, + /// Program Counter. + pub pc : usize, + /// Current line. + pub line : usize, /// Evaluation-stack (since it is a stack based VM). pub evaluations : Vec<Address> } +impl Frame { + #[must_use] + pub fn new(filename : String, module : String, + constants : Vec<Address>, local_names : HashSet<String>, + instructions : Vec<Instruction>, stack_depth : u16) -> Self { + Self { + source_file: filename, + name: module, + constants, + locals: vec![Address::null(); local_names.len()], + local_names, + instructions, + + stack_depth, + pc: 0, + line: 1, + evaluations: Vec::with_capacity(stack_depth as usize) + } + } +} + impl<N> From<N> for Instruction where N: Unsigned + NumCast { fn from(other : N) -> Self { Instruction(num_traits::cast::<N, usize>(other).unwrap()) } } +impl From<Instruction> for usize { + #[inline] + fn from(instr : Instruction) -> Self { + instr.0 + } +} + impl fmt::Debug for Instruction { fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result { write!(f, "0x{:04X}", self.0) diff --git a/src/vm/mod.rs b/src/vm/mod.rs @@ -1,6 +1,8 @@ pub mod address; pub mod opcodes; + pub mod frame; pub mod unmarshal; - +pub mod call_stack; +pub mod evaluation; diff --git a/src/vm/opcodes.rs b/src/vm/opcodes.rs @@ -19,7 +19,7 @@ pub enum Operators { CAST = 11, // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each) MAKE_FUNC = 12, // TAKES 0 OPERAND(s) YIELD = 13, // TAKES 0 OPERAND(s) - RAW_PRINT = 14, // TAKES 0 OPERAND(s) + RAW_PRINT = 14, // TAKES 1 OPERAND(s) N_ADD = 40, // TAKES 0 OPERAND(s) I_ADD = 41, // TAKES 0 OPERAND(s) @@ -57,6 +57,7 @@ impl Operators { | Self::STORE_LOCAL | Self::DUP_N | Self::CAST + | Self::RAW_PRINT | Self::SET_LINE => true, _ => false } diff --git a/src/vm/unmarshal.rs b/src/vm/unmarshal.rs @@ -147,6 +147,7 @@ mod eat { i += 1; } + instrs.push(Instruction(0)); (i, instrs) } @@ -167,16 +168,10 @@ mod eat { let (i, instructions) = eat::instructions(i, bytes); // Construct call-frame. - let stack_frame = frame::Frame { - source_file: filename, - name: module, - constants, - locals, - instructions, - - stack_depth, - evaluations: Vec::with_capacity(stack_depth as usize) - }; + let stack_frame = frame::Frame::new( + filename, module, constants, + locals, instructions, stack_depth); + return (i, stack_frame); } } @@ -191,7 +186,7 @@ pub fn parse_blob(bytes : &ByteSlice) -> frame::Frame { // Parse primary/root code block. let (_, stack_frame) = eat::block(i, bytes); - #[cfg(debug_assertions)] + #[cfg(feature="debug")] println!("{:#?}", stack_frame); // If `stack_frame.constants[2]` is a pointer to a string, then, to use