brokkr

Bytecode virtual machine for Valhalla.
git clone git://git.knutsen.co/brokkr
Log | Files | Refs | README | LICENSE

commit 83179d6b4daa6fbb778348c5174530c681c71073
parent 776caec0aab8a79320538a55ec3882d3b05a0bf1
Author: Demonstrandum <moi@knutsen.co>
Date:   Thu, 20 Feb 2020 16:29:19 +0000

Better way of managing addresses/raw-pointers.

Diffstat:
Msrc/bin.rs | 2+-
Msrc/lib.rs | 13+++++--------
Asrc/vm/address.rs | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/vm/frame.rs | 33++++++++++++++++++++++++++++-----
Dsrc/vm/instructions.rs | 64----------------------------------------------------------------
Msrc/vm/mod.rs | 3++-
Asrc/vm/opcodes.rs | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/vm/unmarshal.rs | 45++++++++++++++++++++++++---------------------
8 files changed, 181 insertions(+), 100 deletions(-)

diff --git a/src/bin.rs b/src/bin.rs @@ -20,7 +20,7 @@ pub fn main() -> Result<(), std::io::Error> { f.read_to_end(&mut buffer) .expect("Could not dump file contents to bytesteam."); - let frame = unmarshal::parse_blob(&buffer); + let _frame = unmarshal::parse_blob(&buffer); } Ok(()) diff --git a/src/lib.rs b/src/lib.rs @@ -1,15 +1,12 @@ #![allow(incomplete_features)] #![feature(const_generics)] - #![warn( - clippy::all, - clippy::pedantic, - )] +#![warn( + clippy::all, + clippy::pedantic, +)] #![allow(clippy::needless_return)] -use std::ffi::c_void; -type VoidPtr = *const c_void; - pub mod assembler; pub mod vm; -pub fn main() { std::process::exit(0); } +pub fn main() { std::process::exit(1); } diff --git a/src/vm/address.rs b/src/vm/address.rs @@ -0,0 +1,57 @@ +use std::fmt; + +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Address(pub usize); + +/// # Address to Generic Types +/// The type of a new `Address` is holding must always +/// implement the `Copy` trait. +/// When making an `Address` for a struct (call it `Foo`), +/// please always make sure you're giving it a reference (`&Foo`), +/// since references implement the `Copy` trait. +#[allow(clippy::inline_always)] +impl Address { + #[inline(always)] + pub fn new<T : Copy>(structure : T) -> Self { + Self(Box::into_raw(Box::new(structure)) as usize) + } + + /// # Safety + /// This function dereferences a raw pointer. + #[inline(always)] + #[must_use] + pub unsafe fn deref<T : Copy>(self) -> T { + *(self.0 as *mut T) + } + + // For temporary use. + #[inline(always)] + #[must_use] + pub fn null() -> Self { Address(0) } +} + +impl fmt::Debug for Address { + fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result { + write!(f, "void *0x{:016X}", self.0) + } +} + +impl fmt::Display for Address { + fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result { + write!(f, "Address(0x{:016X})", self.0) + } +} + + +// Perhaps figure out this overload in the future. +/* +use std::ops::Deref; + +impl<T> Deref for Address { + type Target = T; + + fn deref(&self) -> Self::Target { + *(self.0 as *const T) + } +} +*/ diff --git a/src/vm/frame.rs b/src/vm/frame.rs @@ -1,23 +1,46 @@ use std::collections::HashSet; -use crate::VoidPtr; +use std::fmt; + +use num_traits::{sign::Unsigned, cast::NumCast}; +use super::address::Address; + +#[derive(Clone, Copy, PartialEq)] +pub struct Instruction(pub usize); /// Frame on the call-stack -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Frame { /// Path for non-compiled source file. pub source_file : String, /// Name for the frame, i.e. the module name. pub name : String, /// Vector of all constants used in the _function_. - pub constants : Vec<VoidPtr>, + pub constants : Vec<Address>, /// Set of names for local variables. pub locals : HashSet<String>, /// Instructions for execution of the _function_. - pub instructions : Vec<usize>, + pub instructions : Vec<Instruction>, /// Maximum depth for the evaluation-stack. pub stack_depth : u16, /// Evaluation-stack (since it is a stack based VM). - pub evaluations : Vec<VoidPtr> + pub evaluations : Vec<Address> +} + +impl<N> From<N> for Instruction where N: Unsigned + NumCast { + fn from(other : N) -> Self { + Instruction(num_traits::cast::<N, usize>(other).unwrap()) + } } +impl fmt::Debug for Instruction { + fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result { + write!(f, "0x{:04X}", self.0) + } +} + +impl fmt::Display for Instruction { + fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result { + write!(f, "0x{:04X}", self.0) + } +} diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs @@ -1,64 +0,0 @@ -use num_derive::FromPrimitive; - -#[repr(usize)] -#[allow(non_camel_case_types)] -#[derive(FromPrimitive, Clone, Copy)] -pub enum Operators { - EOI = 0, // TAKES 0 OPERAND(s) (Not a proper operator) - - PUSH_CONST = 1, // TAKES 1 OPERAND(s) - PUSH_LOCAL = 2, // TAKES 1 OPERAND(s) - PUSH_SUPER = 3, // TAKES 1 OPERAND(s) - POP = 4, // TAKES 0 OPERAND(s) - STORE_LOCAL = 5, // TAKES 1 OPERAND(s) - DUP = 6, // TAKES 0 OPERAND(s) - DUP_N = 7, // TAKES 1 OPERAND(s) - SWAP = 8, // TAKES 0 OPERAND(s) - CALL_1 = 9, // TAKES 0 OPERAND(s) - CHECK_TYPE = 10, // TAKES 0 OPERAND(s) - CAST = 11, // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each) - MAKE_FUNC = 12, // TAKES 0 OPERAND(s) - YIELD = 13, // TAKES 0 OPERAND(s) - RAW_PRINT = 14, // TAKES 0 OPERAND(s) - - N_ADD = 40, // TAKES 0 OPERAND(s) - I_ADD = 41, // TAKES 0 OPERAND(s) - R_ADD = 42, // TAKES 0 OPERAND(s) - U_ADD = 43, // TAKES 0 OPERAND(s) - CONCAT = 44, // TAKES 0 OPERAND(s) - N_SUB = 45, // TAKES 0 OPERAND(s) - I_SUB = 46, // TAKES 0 OPERAND(s) - R_SUB = 47, // TAKES 0 OPERAND(s) - U_SUB = 48, // TAKES 0 OPERAND(s) - N_MUL = 49, // TAKES 0 OPERAND(s) - I_MUL = 50, // TAKES 0 OPERAND(s) - R_MUL = 51, // TAKES 0 OPERAND(s) - U_MUL = 52, // TAKES 0 OPERAND(s) - N_DIV = 53, // TAKES 0 OPERAND(s) - I_DIV = 54, // TAKES 0 OPERAND(s) - R_DIV = 55, // TAKES 0 OPERAND(s) - U_DIV = 56, // TAKES 0 OPERAND(s) - - HALT = 200, // TAKES 1 OPERAND(s) - - // Misc- / Meta-codes - SET_LINE = 254, // TAKES 1 OPERAND(s) - NOP = 255, // TAKES 0 OPERAND(s) -} - -impl Operators { - #[must_use] - pub fn takes_operand(self) -> bool { - match self { - Self::HALT - | Self::PUSH_CONST - | Self::PUSH_LOCAL - | Self::PUSH_SUPER - | Self::STORE_LOCAL - | Self::DUP_N - | Self::CAST - | Self::SET_LINE => true, - _ => false - } - } -} diff --git a/src/vm/mod.rs b/src/vm/mod.rs @@ -1,4 +1,5 @@ -pub mod instructions; +pub mod address; +pub mod opcodes; pub mod frame; pub mod unmarshal; diff --git a/src/vm/opcodes.rs b/src/vm/opcodes.rs @@ -0,0 +1,64 @@ +use num_derive::FromPrimitive; + +#[repr(usize)] +#[allow(non_camel_case_types)] +#[derive(FromPrimitive, Clone, Copy, PartialEq, Eq)] +pub enum Operators { + EOI = 0, // TAKES 0 OPERAND(s) (Not a proper operator) + + PUSH_CONST = 1, // TAKES 1 OPERAND(s) + PUSH_LOCAL = 2, // TAKES 1 OPERAND(s) + PUSH_SUPER = 3, // TAKES 1 OPERAND(s) + POP = 4, // TAKES 0 OPERAND(s) + STORE_LOCAL = 5, // TAKES 1 OPERAND(s) + DUP = 6, // TAKES 0 OPERAND(s) + DUP_N = 7, // TAKES 1 OPERAND(s) + SWAP = 8, // TAKES 0 OPERAND(s) + CALL_1 = 9, // TAKES 0 OPERAND(s) + CHECK_TYPE = 10, // TAKES 0 OPERAND(s) + CAST = 11, // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each) + MAKE_FUNC = 12, // TAKES 0 OPERAND(s) + YIELD = 13, // TAKES 0 OPERAND(s) + RAW_PRINT = 14, // TAKES 0 OPERAND(s) + + N_ADD = 40, // TAKES 0 OPERAND(s) + I_ADD = 41, // TAKES 0 OPERAND(s) + R_ADD = 42, // TAKES 0 OPERAND(s) + U_ADD = 43, // TAKES 0 OPERAND(s) + CONCAT = 44, // TAKES 0 OPERAND(s) + N_SUB = 45, // TAKES 0 OPERAND(s) + I_SUB = 46, // TAKES 0 OPERAND(s) + R_SUB = 47, // TAKES 0 OPERAND(s) + U_SUB = 48, // TAKES 0 OPERAND(s) + N_MUL = 49, // TAKES 0 OPERAND(s) + I_MUL = 50, // TAKES 0 OPERAND(s) + R_MUL = 51, // TAKES 0 OPERAND(s) + U_MUL = 52, // TAKES 0 OPERAND(s) + N_DIV = 53, // TAKES 0 OPERAND(s) + I_DIV = 54, // TAKES 0 OPERAND(s) + R_DIV = 55, // TAKES 0 OPERAND(s) + U_DIV = 56, // TAKES 0 OPERAND(s) + + HALT = 200, // TAKES 1 OPERAND(s) + + // Misc- / Meta-codes + SET_LINE = 254, // TAKES 1 OPERAND(s) + NOP = 255, // TAKES 0 OPERAND(s) +} + +impl Operators { + #[must_use] + pub fn takes_operand(self) -> bool { + match self { + Self::HALT + | Self::PUSH_CONST + | Self::PUSH_LOCAL + | Self::PUSH_SUPER + | Self::STORE_LOCAL + | Self::DUP_N + | Self::CAST + | Self::SET_LINE => true, + _ => false + } + } +} diff --git a/src/vm/unmarshal.rs b/src/vm/unmarshal.rs @@ -8,11 +8,12 @@ * The immediate next step from here is to pass the frames to the VM * and have it follow the instructions byte-for-byte. * -!*/ + */ use std::collections::HashSet; -use crate::VoidPtr; -use super::instructions; +use super::address::Address; +use super::opcodes; use super::frame; +use super::frame::Instruction; use num_traits::FromPrimitive; @@ -58,7 +59,7 @@ mod eat { (i + size, padded) } - fn constant(mut i : usize, bytes : &ByteSlice) -> (usize, VoidPtr) { + fn constant(mut i : usize, bytes : &ByteSlice) -> (usize, Address) { let const_type = bytes[i]; i += 1; return match const_type { @@ -66,7 +67,7 @@ mod eat { 0x01..=0x03 => { let (i, bytes_slice) = consume_sized(i, bytes); let bytes_slice = fix_slice_size::<u8, POINTER_BYTES>(&bytes_slice[..POINTER_BYTES]); - let value = usize::from_be_bytes(*bytes_slice) as VoidPtr; + let value = Address(usize::from_be_bytes(*bytes_slice)); (i, value) }, // Parse Strings @@ -75,14 +76,11 @@ mod eat { let bytes_slice = fix_slice_size::<u8, POINTER_BYTES>(&bytes_slice[..POINTER_BYTES]); let str_len = usize::from_be_bytes(*bytes_slice); - // Don't deallocate the string by wrapping it in a `Box`, then - // casting it to a raw pointer and then to *void. - let string = Box::new(std::str::from_utf8(&bytes[i..i + str_len]) + // Store string on heap, `Address` holds a raw pointer to it. + let string = Address::new(std::str::from_utf8(&bytes[i..i + str_len]) .expect("Invalid utf8 bytes in string. Bad bytecode.")); - let string = Box::into_raw(string); // Shadowed... - // String is then accessed by doing: - // `unsafe { &*(frame.constants[2] as *const &str) }` - (i + str_len, string as *const _ as VoidPtr) + + (i + str_len, string) } _ => panic!(format!( "Type-specifier-prefix ({:x}) is not recognised.", @@ -90,13 +88,13 @@ mod eat { } } - pub fn constants(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<VoidPtr>) { + pub fn constants(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<Address>) { // Constant blocks are expected to start with `0x11`. #[cfg(debug_assertions)] assert_eq!(bytes[i], 0x11); i += 1; - let mut consts : Vec<VoidPtr> = vec![]; + let mut consts : Vec<Address> = vec![]; while bytes[i] != 0x00 { let (j, void) = constant(i, bytes); i = j; @@ -122,15 +120,15 @@ mod eat { (i + 1, set) } - pub fn instructions(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<usize>) { - let mut instrs : Vec<usize> = vec![]; + pub fn instructions(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<Instruction>) { + let mut instrs : Vec<Instruction> = vec![]; #[cfg(debug_assertions)] assert_eq!(bytes[i], 0x13); i += 1; while bytes[i] != 0x00 { - instrs.push(bytes[i] as usize); - let maybe_instr : Option<instructions::Operators> = + instrs.push(Instruction::from(bytes[i])); + let maybe_instr : Option<opcodes::Operators> = FromPrimitive::from_usize(bytes[i] as usize); if let Some(instr) = maybe_instr { // If the opcode takes an operand (u16), consume this too. @@ -138,7 +136,7 @@ mod eat { i += 2; let operand = (u16::from(bytes[i - 1]) << 8) + u16::from(bytes[i]); - instrs.push(operand as usize); + instrs.push(Instruction::from(operand)); } } i += 1; @@ -182,7 +180,7 @@ mod eat { pub fn parse_blob(bytes : &ByteSlice) -> frame::Frame { let mut i : usize = 0; // Parse compiler version number. - let version = bytes[0..2].as_ref(); + let _version = bytes[0..2].as_ref(); i += 3; // Parse primary/root code block. @@ -195,10 +193,15 @@ pub fn parse_blob(bytes : &ByteSlice) -> frame::Frame { // it in Rust, all you have to do is: // ``` // let string : &str = unsafe { - // *(stack_frame.constants[2] as *const &str) + // *(stack_frame.constants[2].0 as *const &str) // }; // println!("str: {}", string); // ``` + // Or even better: + // ``` + // let string : &str = unsafe { stack_frame.constants[2].deref() }; + // println!("str: {}", string); + // ``` return stack_frame; }