commit 83179d6b4daa6fbb778348c5174530c681c71073
parent 776caec0aab8a79320538a55ec3882d3b05a0bf1
Author: Demonstrandum <moi@knutsen.co>
Date: Thu, 20 Feb 2020 16:29:19 +0000
Better way of managing addresses/raw-pointers.
Diffstat:
8 files changed, 181 insertions(+), 100 deletions(-)
diff --git a/src/bin.rs b/src/bin.rs
@@ -20,7 +20,7 @@ pub fn main() -> Result<(), std::io::Error> {
f.read_to_end(&mut buffer)
.expect("Could not dump file contents to bytesteam.");
- let frame = unmarshal::parse_blob(&buffer);
+ let _frame = unmarshal::parse_blob(&buffer);
}
Ok(())
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,15 +1,12 @@
#![allow(incomplete_features)]
#![feature(const_generics)]
- #![warn(
- clippy::all,
- clippy::pedantic,
- )]
+#![warn(
+ clippy::all,
+ clippy::pedantic,
+)]
#![allow(clippy::needless_return)]
-use std::ffi::c_void;
-type VoidPtr = *const c_void;
-
pub mod assembler;
pub mod vm;
-pub fn main() { std::process::exit(0); }
+pub fn main() { std::process::exit(1); }
diff --git a/src/vm/address.rs b/src/vm/address.rs
@@ -0,0 +1,57 @@
+use std::fmt;
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Address(pub usize);
+
+/// # Address to Generic Types
+/// The type of a new `Address` is holding must always
+/// implement the `Copy` trait.
+/// When making an `Address` for a struct (call it `Foo`),
+/// please always make sure you're giving it a reference (`&Foo`),
+/// since references implement the `Copy` trait.
+#[allow(clippy::inline_always)]
+impl Address {
+ #[inline(always)]
+ pub fn new<T : Copy>(structure : T) -> Self {
+ Self(Box::into_raw(Box::new(structure)) as usize)
+ }
+
+ /// # Safety
+ /// This function dereferences a raw pointer.
+ #[inline(always)]
+ #[must_use]
+ pub unsafe fn deref<T : Copy>(self) -> T {
+ *(self.0 as *mut T)
+ }
+
+ // For temporary use.
+ #[inline(always)]
+ #[must_use]
+ pub fn null() -> Self { Address(0) }
+}
+
+impl fmt::Debug for Address {
+ fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "void *0x{:016X}", self.0)
+ }
+}
+
+impl fmt::Display for Address {
+ fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "Address(0x{:016X})", self.0)
+ }
+}
+
+
+// Perhaps figure out this overload in the future.
+/*
+use std::ops::Deref;
+
+impl<T> Deref for Address {
+ type Target = T;
+
+ fn deref(&self) -> Self::Target {
+ *(self.0 as *const T)
+ }
+}
+*/
diff --git a/src/vm/frame.rs b/src/vm/frame.rs
@@ -1,23 +1,46 @@
use std::collections::HashSet;
-use crate::VoidPtr;
+use std::fmt;
+
+use num_traits::{sign::Unsigned, cast::NumCast};
+use super::address::Address;
+
+#[derive(Clone, Copy, PartialEq)]
+pub struct Instruction(pub usize);
/// Frame on the call-stack
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct Frame {
/// Path for non-compiled source file.
pub source_file : String,
/// Name for the frame, i.e. the module name.
pub name : String,
/// Vector of all constants used in the _function_.
- pub constants : Vec<VoidPtr>,
+ pub constants : Vec<Address>,
/// Set of names for local variables.
pub locals : HashSet<String>,
/// Instructions for execution of the _function_.
- pub instructions : Vec<usize>,
+ pub instructions : Vec<Instruction>,
/// Maximum depth for the evaluation-stack.
pub stack_depth : u16,
/// Evaluation-stack (since it is a stack based VM).
- pub evaluations : Vec<VoidPtr>
+ pub evaluations : Vec<Address>
+}
+
+impl<N> From<N> for Instruction where N: Unsigned + NumCast {
+ fn from(other : N) -> Self {
+ Instruction(num_traits::cast::<N, usize>(other).unwrap())
+ }
}
+impl fmt::Debug for Instruction {
+ fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "0x{:04X}", self.0)
+ }
+}
+
+impl fmt::Display for Instruction {
+ fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "0x{:04X}", self.0)
+ }
+}
diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs
@@ -1,64 +0,0 @@
-use num_derive::FromPrimitive;
-
-#[repr(usize)]
-#[allow(non_camel_case_types)]
-#[derive(FromPrimitive, Clone, Copy)]
-pub enum Operators {
- EOI = 0, // TAKES 0 OPERAND(s) (Not a proper operator)
-
- PUSH_CONST = 1, // TAKES 1 OPERAND(s)
- PUSH_LOCAL = 2, // TAKES 1 OPERAND(s)
- PUSH_SUPER = 3, // TAKES 1 OPERAND(s)
- POP = 4, // TAKES 0 OPERAND(s)
- STORE_LOCAL = 5, // TAKES 1 OPERAND(s)
- DUP = 6, // TAKES 0 OPERAND(s)
- DUP_N = 7, // TAKES 1 OPERAND(s)
- SWAP = 8, // TAKES 0 OPERAND(s)
- CALL_1 = 9, // TAKES 0 OPERAND(s)
- CHECK_TYPE = 10, // TAKES 0 OPERAND(s)
- CAST = 11, // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each)
- MAKE_FUNC = 12, // TAKES 0 OPERAND(s)
- YIELD = 13, // TAKES 0 OPERAND(s)
- RAW_PRINT = 14, // TAKES 0 OPERAND(s)
-
- N_ADD = 40, // TAKES 0 OPERAND(s)
- I_ADD = 41, // TAKES 0 OPERAND(s)
- R_ADD = 42, // TAKES 0 OPERAND(s)
- U_ADD = 43, // TAKES 0 OPERAND(s)
- CONCAT = 44, // TAKES 0 OPERAND(s)
- N_SUB = 45, // TAKES 0 OPERAND(s)
- I_SUB = 46, // TAKES 0 OPERAND(s)
- R_SUB = 47, // TAKES 0 OPERAND(s)
- U_SUB = 48, // TAKES 0 OPERAND(s)
- N_MUL = 49, // TAKES 0 OPERAND(s)
- I_MUL = 50, // TAKES 0 OPERAND(s)
- R_MUL = 51, // TAKES 0 OPERAND(s)
- U_MUL = 52, // TAKES 0 OPERAND(s)
- N_DIV = 53, // TAKES 0 OPERAND(s)
- I_DIV = 54, // TAKES 0 OPERAND(s)
- R_DIV = 55, // TAKES 0 OPERAND(s)
- U_DIV = 56, // TAKES 0 OPERAND(s)
-
- HALT = 200, // TAKES 1 OPERAND(s)
-
- // Misc- / Meta-codes
- SET_LINE = 254, // TAKES 1 OPERAND(s)
- NOP = 255, // TAKES 0 OPERAND(s)
-}
-
-impl Operators {
- #[must_use]
- pub fn takes_operand(self) -> bool {
- match self {
- Self::HALT
- | Self::PUSH_CONST
- | Self::PUSH_LOCAL
- | Self::PUSH_SUPER
- | Self::STORE_LOCAL
- | Self::DUP_N
- | Self::CAST
- | Self::SET_LINE => true,
- _ => false
- }
- }
-}
diff --git a/src/vm/mod.rs b/src/vm/mod.rs
@@ -1,4 +1,5 @@
-pub mod instructions;
+pub mod address;
+pub mod opcodes;
pub mod frame;
pub mod unmarshal;
diff --git a/src/vm/opcodes.rs b/src/vm/opcodes.rs
@@ -0,0 +1,64 @@
+use num_derive::FromPrimitive;
+
+#[repr(usize)]
+#[allow(non_camel_case_types)]
+#[derive(FromPrimitive, Clone, Copy, PartialEq, Eq)]
+pub enum Operators {
+ EOI = 0, // TAKES 0 OPERAND(s) (Not a proper operator)
+
+ PUSH_CONST = 1, // TAKES 1 OPERAND(s)
+ PUSH_LOCAL = 2, // TAKES 1 OPERAND(s)
+ PUSH_SUPER = 3, // TAKES 1 OPERAND(s)
+ POP = 4, // TAKES 0 OPERAND(s)
+ STORE_LOCAL = 5, // TAKES 1 OPERAND(s)
+ DUP = 6, // TAKES 0 OPERAND(s)
+ DUP_N = 7, // TAKES 1 OPERAND(s)
+ SWAP = 8, // TAKES 0 OPERAND(s)
+ CALL_1 = 9, // TAKES 0 OPERAND(s)
+ CHECK_TYPE = 10, // TAKES 0 OPERAND(s)
+ CAST = 11, // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each)
+ MAKE_FUNC = 12, // TAKES 0 OPERAND(s)
+ YIELD = 13, // TAKES 0 OPERAND(s)
+ RAW_PRINT = 14, // TAKES 0 OPERAND(s)
+
+ N_ADD = 40, // TAKES 0 OPERAND(s)
+ I_ADD = 41, // TAKES 0 OPERAND(s)
+ R_ADD = 42, // TAKES 0 OPERAND(s)
+ U_ADD = 43, // TAKES 0 OPERAND(s)
+ CONCAT = 44, // TAKES 0 OPERAND(s)
+ N_SUB = 45, // TAKES 0 OPERAND(s)
+ I_SUB = 46, // TAKES 0 OPERAND(s)
+ R_SUB = 47, // TAKES 0 OPERAND(s)
+ U_SUB = 48, // TAKES 0 OPERAND(s)
+ N_MUL = 49, // TAKES 0 OPERAND(s)
+ I_MUL = 50, // TAKES 0 OPERAND(s)
+ R_MUL = 51, // TAKES 0 OPERAND(s)
+ U_MUL = 52, // TAKES 0 OPERAND(s)
+ N_DIV = 53, // TAKES 0 OPERAND(s)
+ I_DIV = 54, // TAKES 0 OPERAND(s)
+ R_DIV = 55, // TAKES 0 OPERAND(s)
+ U_DIV = 56, // TAKES 0 OPERAND(s)
+
+ HALT = 200, // TAKES 1 OPERAND(s)
+
+ // Misc- / Meta-codes
+ SET_LINE = 254, // TAKES 1 OPERAND(s)
+ NOP = 255, // TAKES 0 OPERAND(s)
+}
+
+impl Operators {
+ #[must_use]
+ pub fn takes_operand(self) -> bool {
+ match self {
+ Self::HALT
+ | Self::PUSH_CONST
+ | Self::PUSH_LOCAL
+ | Self::PUSH_SUPER
+ | Self::STORE_LOCAL
+ | Self::DUP_N
+ | Self::CAST
+ | Self::SET_LINE => true,
+ _ => false
+ }
+ }
+}
diff --git a/src/vm/unmarshal.rs b/src/vm/unmarshal.rs
@@ -8,11 +8,12 @@
* The immediate next step from here is to pass the frames to the VM
* and have it follow the instructions byte-for-byte.
*
-!*/
+ */
use std::collections::HashSet;
-use crate::VoidPtr;
-use super::instructions;
+use super::address::Address;
+use super::opcodes;
use super::frame;
+use super::frame::Instruction;
use num_traits::FromPrimitive;
@@ -58,7 +59,7 @@ mod eat {
(i + size, padded)
}
- fn constant(mut i : usize, bytes : &ByteSlice) -> (usize, VoidPtr) {
+ fn constant(mut i : usize, bytes : &ByteSlice) -> (usize, Address) {
let const_type = bytes[i];
i += 1;
return match const_type {
@@ -66,7 +67,7 @@ mod eat {
0x01..=0x03 => {
let (i, bytes_slice) = consume_sized(i, bytes);
let bytes_slice = fix_slice_size::<u8, POINTER_BYTES>(&bytes_slice[..POINTER_BYTES]);
- let value = usize::from_be_bytes(*bytes_slice) as VoidPtr;
+ let value = Address(usize::from_be_bytes(*bytes_slice));
(i, value)
},
// Parse Strings
@@ -75,14 +76,11 @@ mod eat {
let bytes_slice = fix_slice_size::<u8, POINTER_BYTES>(&bytes_slice[..POINTER_BYTES]);
let str_len = usize::from_be_bytes(*bytes_slice);
- // Don't deallocate the string by wrapping it in a `Box`, then
- // casting it to a raw pointer and then to *void.
- let string = Box::new(std::str::from_utf8(&bytes[i..i + str_len])
+ // Store string on heap, `Address` holds a raw pointer to it.
+ let string = Address::new(std::str::from_utf8(&bytes[i..i + str_len])
.expect("Invalid utf8 bytes in string. Bad bytecode."));
- let string = Box::into_raw(string); // Shadowed...
- // String is then accessed by doing:
- // `unsafe { &*(frame.constants[2] as *const &str) }`
- (i + str_len, string as *const _ as VoidPtr)
+
+ (i + str_len, string)
}
_ => panic!(format!(
"Type-specifier-prefix ({:x}) is not recognised.",
@@ -90,13 +88,13 @@ mod eat {
}
}
- pub fn constants(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<VoidPtr>) {
+ pub fn constants(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<Address>) {
// Constant blocks are expected to start with `0x11`.
#[cfg(debug_assertions)]
assert_eq!(bytes[i], 0x11);
i += 1;
- let mut consts : Vec<VoidPtr> = vec![];
+ let mut consts : Vec<Address> = vec![];
while bytes[i] != 0x00 {
let (j, void) = constant(i, bytes);
i = j;
@@ -122,15 +120,15 @@ mod eat {
(i + 1, set)
}
- pub fn instructions(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<usize>) {
- let mut instrs : Vec<usize> = vec![];
+ pub fn instructions(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<Instruction>) {
+ let mut instrs : Vec<Instruction> = vec![];
#[cfg(debug_assertions)]
assert_eq!(bytes[i], 0x13);
i += 1;
while bytes[i] != 0x00 {
- instrs.push(bytes[i] as usize);
- let maybe_instr : Option<instructions::Operators> =
+ instrs.push(Instruction::from(bytes[i]));
+ let maybe_instr : Option<opcodes::Operators> =
FromPrimitive::from_usize(bytes[i] as usize);
if let Some(instr) = maybe_instr {
// If the opcode takes an operand (u16), consume this too.
@@ -138,7 +136,7 @@ mod eat {
i += 2;
let operand = (u16::from(bytes[i - 1]) << 8)
+ u16::from(bytes[i]);
- instrs.push(operand as usize);
+ instrs.push(Instruction::from(operand));
}
}
i += 1;
@@ -182,7 +180,7 @@ mod eat {
pub fn parse_blob(bytes : &ByteSlice) -> frame::Frame {
let mut i : usize = 0;
// Parse compiler version number.
- let version = bytes[0..2].as_ref();
+ let _version = bytes[0..2].as_ref();
i += 3;
// Parse primary/root code block.
@@ -195,10 +193,15 @@ pub fn parse_blob(bytes : &ByteSlice) -> frame::Frame {
// it in Rust, all you have to do is:
// ```
// let string : &str = unsafe {
- // *(stack_frame.constants[2] as *const &str)
+ // *(stack_frame.constants[2].0 as *const &str)
// };
// println!("str: {}", string);
// ```
+ // Or even better:
+ // ```
+ // let string : &str = unsafe { stack_frame.constants[2].deref() };
+ // println!("str: {}", string);
+ // ```
return stack_frame;
}