commit 83179d6b4daa6fbb778348c5174530c681c71073
parent 776caec0aab8a79320538a55ec3882d3b05a0bf1
Author: Demonstrandum <moi@knutsen.co>
Date:   Thu, 20 Feb 2020 16:29:19 +0000
Better way of managing addresses/raw-pointers.
Diffstat:
8 files changed, 181 insertions(+), 100 deletions(-)
diff --git a/src/bin.rs b/src/bin.rs
@@ -20,7 +20,7 @@ pub fn main() -> Result<(), std::io::Error> {
         f.read_to_end(&mut buffer)
             .expect("Could not dump file contents to bytesteam.");
 
-        let frame = unmarshal::parse_blob(&buffer);
+        let _frame = unmarshal::parse_blob(&buffer);
     }
 
     Ok(())
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,15 +1,12 @@
 #![allow(incomplete_features)]
 #![feature(const_generics)]
- #![warn(
-     clippy::all,
-     clippy::pedantic,
- )]
+#![warn(
+    clippy::all,
+    clippy::pedantic,
+)]
 #![allow(clippy::needless_return)]
 
-use std::ffi::c_void;
-type VoidPtr = *const c_void;
-
 pub mod assembler;
 pub mod vm;
 
-pub fn main() { std::process::exit(0); }
+pub fn main() { std::process::exit(1); }
diff --git a/src/vm/address.rs b/src/vm/address.rs
@@ -0,0 +1,57 @@
+use std::fmt;
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Address(pub usize);
+
+/// # Address to Generic Types
+///   The type of a new `Address` is holding must always
+/// implement the `Copy` trait.
+///   When making an `Address` for a struct (call it `Foo`),
+/// please always make sure you're giving it a reference (`&Foo`),
+/// since references implement the `Copy` trait.
+#[allow(clippy::inline_always)]
+impl Address {
+    #[inline(always)]
+    pub fn new<T : Copy>(structure : T) -> Self {
+        Self(Box::into_raw(Box::new(structure)) as usize)
+    }
+
+    /// # Safety
+    /// This function dereferences a raw pointer.
+    #[inline(always)]
+    #[must_use]
+    pub unsafe fn deref<T : Copy>(self) -> T {
+        *(self.0 as *mut T)
+    }
+
+    // For temporary use.
+    #[inline(always)]
+    #[must_use]
+    pub fn null() -> Self { Address(0) }
+}
+
+impl fmt::Debug for Address {
+    fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "void *0x{:016X}", self.0)
+    }
+}
+
+impl fmt::Display for Address {
+    fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Address(0x{:016X})", self.0)
+    }
+}
+
+
+// Perhaps figure out this overload in the future.
+/*
+use std::ops::Deref;
+
+impl<T> Deref for Address {
+    type Target = T;
+
+    fn deref(&self) -> Self::Target {
+        *(self.0 as *const T)
+    }
+}
+*/
diff --git a/src/vm/frame.rs b/src/vm/frame.rs
@@ -1,23 +1,46 @@
 use std::collections::HashSet;
-use crate::VoidPtr;
+use std::fmt;
+
+use num_traits::{sign::Unsigned, cast::NumCast};
+use super::address::Address;
+
+#[derive(Clone, Copy, PartialEq)]
+pub struct Instruction(pub usize);
 
 /// Frame on the call-stack
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Frame {
     /// Path for non-compiled source file.
     pub source_file : String,
     /// Name for the frame, i.e. the module name.
     pub name : String,
     /// Vector of all constants used in the _function_.
-    pub constants : Vec<VoidPtr>,
+    pub constants : Vec<Address>,
     /// Set of names for local variables.
     pub locals : HashSet<String>,
     /// Instructions for execution of the _function_.
-    pub instructions : Vec<usize>,
+    pub instructions : Vec<Instruction>,
 
     /// Maximum depth for the evaluation-stack.
     pub stack_depth : u16,
     /// Evaluation-stack (since it is a stack based VM).
-    pub evaluations : Vec<VoidPtr>
+    pub evaluations : Vec<Address>
+}
+
+impl<N> From<N> for Instruction where N: Unsigned + NumCast {
+    fn from(other : N) -> Self {
+        Instruction(num_traits::cast::<N, usize>(other).unwrap())
+    }
 }
 
+impl fmt::Debug for Instruction {
+    fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "0x{:04X}", self.0)
+    }
+}
+
+impl fmt::Display for Instruction {
+    fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "0x{:04X}", self.0)
+    }
+}
diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs
@@ -1,64 +0,0 @@
-use num_derive::FromPrimitive;
-
-#[repr(usize)]
-#[allow(non_camel_case_types)]
-#[derive(FromPrimitive, Clone, Copy)]
-pub enum Operators {
-    EOI         = 0,   // TAKES 0 OPERAND(s) (Not a proper operator)
-
-    PUSH_CONST  = 1,   // TAKES 1 OPERAND(s)
-    PUSH_LOCAL  = 2,   // TAKES 1 OPERAND(s)
-    PUSH_SUPER  = 3,   // TAKES 1 OPERAND(s)
-    POP         = 4,   // TAKES 0 OPERAND(s)
-    STORE_LOCAL = 5,   // TAKES 1 OPERAND(s)
-    DUP         = 6,   // TAKES 0 OPERAND(s)
-    DUP_N       = 7,   // TAKES 1 OPERAND(s)
-    SWAP        = 8,   // TAKES 0 OPERAND(s)
-    CALL_1      = 9,   // TAKES 0 OPERAND(s)
-    CHECK_TYPE  = 10,  // TAKES 0 OPERAND(s)
-    CAST        = 11,  // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each)
-    MAKE_FUNC   = 12,  // TAKES 0 OPERAND(s)
-    YIELD       = 13,  // TAKES 0 OPERAND(s)
-    RAW_PRINT   = 14,  // TAKES 0 OPERAND(s)
-
-    N_ADD       = 40,  // TAKES 0 OPERAND(s)
-    I_ADD       = 41,  // TAKES 0 OPERAND(s)
-    R_ADD       = 42,  // TAKES 0 OPERAND(s)
-    U_ADD       = 43,  // TAKES 0 OPERAND(s)
-    CONCAT      = 44,  // TAKES 0 OPERAND(s)
-    N_SUB       = 45,  // TAKES 0 OPERAND(s)
-    I_SUB       = 46,  // TAKES 0 OPERAND(s)
-    R_SUB       = 47,  // TAKES 0 OPERAND(s)
-    U_SUB       = 48,  // TAKES 0 OPERAND(s)
-    N_MUL       = 49,  // TAKES 0 OPERAND(s)
-    I_MUL       = 50,  // TAKES 0 OPERAND(s)
-    R_MUL       = 51,  // TAKES 0 OPERAND(s)
-    U_MUL       = 52,  // TAKES 0 OPERAND(s)
-    N_DIV       = 53,  // TAKES 0 OPERAND(s)
-    I_DIV       = 54,  // TAKES 0 OPERAND(s)
-    R_DIV       = 55,  // TAKES 0 OPERAND(s)
-    U_DIV       = 56,  // TAKES 0 OPERAND(s)
-
-    HALT        = 200, // TAKES 1 OPERAND(s)
-
-    // Misc- / Meta-codes
-    SET_LINE = 254,  // TAKES 1 OPERAND(s)
-    NOP = 255,       // TAKES 0 OPERAND(s)
-}
-
-impl Operators {
-    #[must_use]
-    pub fn takes_operand(self) -> bool {
-        match self {
-            Self::HALT
-            | Self::PUSH_CONST
-            | Self::PUSH_LOCAL
-            | Self::PUSH_SUPER
-            | Self::STORE_LOCAL
-            | Self::DUP_N
-            | Self::CAST
-            | Self::SET_LINE => true,
-            _ => false
-        }
-    }
-}
diff --git a/src/vm/mod.rs b/src/vm/mod.rs
@@ -1,4 +1,5 @@
-pub mod instructions;
+pub mod address;
+pub mod opcodes;
 pub mod frame;
 pub mod unmarshal;
 
diff --git a/src/vm/opcodes.rs b/src/vm/opcodes.rs
@@ -0,0 +1,64 @@
+use num_derive::FromPrimitive;
+
+#[repr(usize)]
+#[allow(non_camel_case_types)]
+#[derive(FromPrimitive, Clone, Copy, PartialEq, Eq)]
+pub enum Operators {
+    EOI         = 0,   // TAKES 0 OPERAND(s) (Not a proper operator)
+
+    PUSH_CONST  = 1,   // TAKES 1 OPERAND(s)
+    PUSH_LOCAL  = 2,   // TAKES 1 OPERAND(s)
+    PUSH_SUPER  = 3,   // TAKES 1 OPERAND(s)
+    POP         = 4,   // TAKES 0 OPERAND(s)
+    STORE_LOCAL = 5,   // TAKES 1 OPERAND(s)
+    DUP         = 6,   // TAKES 0 OPERAND(s)
+    DUP_N       = 7,   // TAKES 1 OPERAND(s)
+    SWAP        = 8,   // TAKES 0 OPERAND(s)
+    CALL_1      = 9,   // TAKES 0 OPERAND(s)
+    CHECK_TYPE  = 10,  // TAKES 0 OPERAND(s)
+    CAST        = 11,  // TAKES 2 OPERAND(s) (2 operands, 1 out of 2 bytes for each)
+    MAKE_FUNC   = 12,  // TAKES 0 OPERAND(s)
+    YIELD       = 13,  // TAKES 0 OPERAND(s)
+    RAW_PRINT   = 14,  // TAKES 0 OPERAND(s)
+
+    N_ADD       = 40,  // TAKES 0 OPERAND(s)
+    I_ADD       = 41,  // TAKES 0 OPERAND(s)
+    R_ADD       = 42,  // TAKES 0 OPERAND(s)
+    U_ADD       = 43,  // TAKES 0 OPERAND(s)
+    CONCAT      = 44,  // TAKES 0 OPERAND(s)
+    N_SUB       = 45,  // TAKES 0 OPERAND(s)
+    I_SUB       = 46,  // TAKES 0 OPERAND(s)
+    R_SUB       = 47,  // TAKES 0 OPERAND(s)
+    U_SUB       = 48,  // TAKES 0 OPERAND(s)
+    N_MUL       = 49,  // TAKES 0 OPERAND(s)
+    I_MUL       = 50,  // TAKES 0 OPERAND(s)
+    R_MUL       = 51,  // TAKES 0 OPERAND(s)
+    U_MUL       = 52,  // TAKES 0 OPERAND(s)
+    N_DIV       = 53,  // TAKES 0 OPERAND(s)
+    I_DIV       = 54,  // TAKES 0 OPERAND(s)
+    R_DIV       = 55,  // TAKES 0 OPERAND(s)
+    U_DIV       = 56,  // TAKES 0 OPERAND(s)
+
+    HALT        = 200, // TAKES 1 OPERAND(s)
+
+    // Misc- / Meta-codes
+    SET_LINE = 254,  // TAKES 1 OPERAND(s)
+    NOP = 255,       // TAKES 0 OPERAND(s)
+}
+
+impl Operators {
+    #[must_use]
+    pub fn takes_operand(self) -> bool {
+        match self {
+            Self::HALT
+            | Self::PUSH_CONST
+            | Self::PUSH_LOCAL
+            | Self::PUSH_SUPER
+            | Self::STORE_LOCAL
+            | Self::DUP_N
+            | Self::CAST
+            | Self::SET_LINE => true,
+            _ => false
+        }
+    }
+}
diff --git a/src/vm/unmarshal.rs b/src/vm/unmarshal.rs
@@ -8,11 +8,12 @@
  * The immediate next step from here is to pass the frames to the VM
  * and have it follow the instructions byte-for-byte.
  *
-!*/
+ */
 use std::collections::HashSet;
-use crate::VoidPtr;
-use super::instructions;
+use super::address::Address;
+use super::opcodes;
 use super::frame;
+use super::frame::Instruction;
 
 use num_traits::FromPrimitive;
 
@@ -58,7 +59,7 @@ mod eat {
         (i + size, padded)
     }
 
-    fn constant(mut i : usize, bytes : &ByteSlice) -> (usize, VoidPtr) {
+    fn constant(mut i : usize, bytes : &ByteSlice) -> (usize, Address) {
         let const_type = bytes[i];
         i += 1;
         return match const_type {
@@ -66,7 +67,7 @@ mod eat {
             0x01..=0x03 => {
                 let (i, bytes_slice) = consume_sized(i, bytes);
                 let bytes_slice = fix_slice_size::<u8, POINTER_BYTES>(&bytes_slice[..POINTER_BYTES]);
-                let value = usize::from_be_bytes(*bytes_slice) as VoidPtr;
+                let value = Address(usize::from_be_bytes(*bytes_slice));
                 (i, value)
             },
             // Parse Strings
@@ -75,14 +76,11 @@ mod eat {
                 let bytes_slice = fix_slice_size::<u8, POINTER_BYTES>(&bytes_slice[..POINTER_BYTES]);
                 let str_len = usize::from_be_bytes(*bytes_slice);
 
-                // Don't deallocate the string by wrapping it in a `Box`, then
-                //   casting it to a raw pointer and then to *void.
-                let string = Box::new(std::str::from_utf8(&bytes[i..i + str_len])
+                // Store string on heap, `Address` holds a raw pointer to it.
+                let string = Address::new(std::str::from_utf8(&bytes[i..i + str_len])
                     .expect("Invalid utf8 bytes in string. Bad bytecode."));
-                let string = Box::into_raw(string);  // Shadowed...
-                // String is then accessed by doing:
-                //   `unsafe { &*(frame.constants[2] as *const &str) }`
-                (i + str_len, string as *const _ as VoidPtr)
+
+                (i + str_len, string)
             }
             _ => panic!(format!(
                 "Type-specifier-prefix ({:x}) is not recognised.",
@@ -90,13 +88,13 @@ mod eat {
         }
     }
 
-    pub fn constants(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<VoidPtr>) {
+    pub fn constants(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<Address>) {
         // Constant blocks are expected to start with `0x11`.
         #[cfg(debug_assertions)]
         assert_eq!(bytes[i], 0x11);
         i += 1;
 
-        let mut consts : Vec<VoidPtr> = vec![];
+        let mut consts : Vec<Address> = vec![];
         while bytes[i] != 0x00 {
             let (j, void) = constant(i, bytes);
             i = j;
@@ -122,15 +120,15 @@ mod eat {
         (i + 1, set)
     }
 
-    pub fn instructions(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<usize>) {
-        let mut instrs : Vec<usize> = vec![];
+    pub fn instructions(mut i : usize, bytes : &ByteSlice) -> (usize, Vec<Instruction>) {
+        let mut instrs : Vec<Instruction> = vec![];
         #[cfg(debug_assertions)]
         assert_eq!(bytes[i], 0x13);
         i += 1;
 
         while bytes[i] != 0x00 {
-            instrs.push(bytes[i] as usize);
-            let maybe_instr : Option<instructions::Operators> =
+            instrs.push(Instruction::from(bytes[i]));
+            let maybe_instr : Option<opcodes::Operators> =
                 FromPrimitive::from_usize(bytes[i] as usize);
             if let Some(instr) = maybe_instr {
                 // If the opcode takes an operand (u16), consume this too.
@@ -138,7 +136,7 @@ mod eat {
                     i += 2;
                     let operand = (u16::from(bytes[i - 1]) << 8)
                         + u16::from(bytes[i]);
-                    instrs.push(operand as usize);
+                    instrs.push(Instruction::from(operand));
                 }
             }
             i += 1;
@@ -182,7 +180,7 @@ mod eat {
 pub fn parse_blob(bytes : &ByteSlice) -> frame::Frame {
     let mut i : usize = 0;
     // Parse compiler version number.
-    let version = bytes[0..2].as_ref();
+    let _version = bytes[0..2].as_ref();
     i += 3;
 
     // Parse primary/root code block.
@@ -195,10 +193,15 @@ pub fn parse_blob(bytes : &ByteSlice) -> frame::Frame {
     // it in Rust, all you have to do is:
     // ```
     //   let string : &str = unsafe {
-    //       *(stack_frame.constants[2] as *const &str)
+    //       *(stack_frame.constants[2].0 as *const &str)
     //   };
     //   println!("str: {}", string);
     // ```
+    // Or even better:
+    // ```
+    //   let string : &str = unsafe { stack_frame.constants[2].deref() };
+    //   println!("str: {}", string);
+    // ```
 
     return stack_frame;
 }