commit 07de6e47c5e479c5deadf225c4b8d62d0dc076ae
parent 3566ca3f0bdfc2c0684253c013b5d0953c898a36
Author: Demonstrandum <moi@knutsen.co>
Date:   Mon, 22 Jun 2020 04:37:41 +0100
Added support for tripple quote expressions.
Diffstat:
5 files changed, 96 insertions(+), 9 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -6,6 +6,8 @@ version = "0.1.0"
 authors = ["Demonstrandum <moi@knutsen.co>"]
 edition = "2018"
 
+[features]
+debug = []
 
 [lib]
 name = "seam"
diff --git a/samples/xml-example-1.sex b/samples/xml-example-1.sex
@@ -1,7 +1,7 @@
 (message :status urgernt
 	(to Tove)
 	(from Jani)
-	(heading A reminder \(Again!\) for you)
+	(heading A \"reminder\" \(Again!\) for you)
 	(body Don't forget me this weekend!))
 
 
diff --git a/samples/xml-example-2.sex b/samples/xml-example-2.sex
@@ -10,7 +10,7 @@
 		(name "French(-style) Toast")
 		(price $4.50)
 		(description """
-			Thick slices made from our homemade
+			Thick slices made from our "homemade"
 			sourdough bread.
 		""")
 		(calories 600))
@@ -21,6 +21,6 @@
 			Two eggs, bacon or sausage, toast,
 			and our ever-popular hash browns.)
 		(calories "950")))
-			
+
 
 
diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs
@@ -4,7 +4,7 @@ use std::path::Path;
 use std::{fmt, error::Error};
 
 #[derive(Debug, Clone)]
-pub struct LexError(Token, String);
+pub struct LexError(tokens::Site, String);
 
 impl fmt::Display for LexError {
     fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -42,7 +42,7 @@ fn character_kind(character : char, prev : Option<tokens::Kind>)
     }
 }
 
-pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>)
+pub fn lex<P: AsRef<Path>>(string : String, source : Option<P>)
     -> Result<TokenStream, LexError> {
 
     let eof = string.len();
@@ -56,6 +56,7 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>)
     let mut token_start : usize = 0;
     let mut current_kind = None;
     let mut old_kind = None;
+    let mut escaped = false;
 
     while bytes < eof {
         let current_byte = string.as_bytes()[bytes];
@@ -69,9 +70,71 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>)
 
         let character = current_byte as char;
 
-        if character == ';' {  // EON Comment
+        // Tripple quoted string:
+        if character == '"' && &string[bytes..bytes + 3] == "\"\"\"" {
+            token_start = line_bytes;
+            let start_line = lines;
+            bytes += 3;
+            line_bytes += 3;
+            while &string[bytes..bytes + 3] != "\"\"\"" {
+                if string[bytes..].is_empty() {
+                    let mut site = tokens::Site::from_line(
+                        lines, line_bytes, 1);
+                    site.source = source
+                        .map(|e| e.as_ref().display().to_string());
+                    return Err(LexError(site,
+                        String::from("Unclosed tripple-quoted string.")));
+                }
+                let c = string.as_bytes()[bytes];
+                if c == '\n' as u8 {
+                    lines += 1;
+                    line_bytes = 0;
+                }
+                accumulator.push(c);
+                bytes += 1;
+                line_bytes += 1;
+            }
+            bytes += 3;
+            line_bytes += 3;
+            current_kind = None;
+
+            let span = accumulator.len() + 3 + 3;
+            tokens.push(Token::new(tokens::Kind::String,
+                String::from_utf8(accumulator).unwrap(),
+                tokens::Site::from_line(start_line,
+                    token_start, span)));
+            accumulator = Vec::new();
+            continue;
+        }
+
+        if character == '\\' {  // Escapes
+            if current_kind == Some(tokens::Kind::String) {
+                // How escapes work in strings:
+            } else {
+                // How they work outside strings:
+                if bytes + 1 == eof {
+                    continue;
+                }
+                match string.as_bytes()[bytes + 1] as char {
+                    '\n' | '\r' | ' ' | '\t' => {
+                        current_kind = None;
+                        bytes += 1;
+                        line_bytes += 1;
+                    },
+                    _ => ()
+                }
+                escaped = true;
+                bytes += 1;
+                line_bytes += 1;
+                continue;
+            }
+        }
+
+        // EON Comments:
+        if character == ';' && current_kind != Some(tokens::Kind::String) {
             let mut i = 0;
-            while string.as_bytes()[bytes + i] != '\n' as u8 {
+            while bytes < eof
+            && string.as_bytes()[bytes + i] != '\n' as u8 {
                 i += 1;
             }
             bytes += i;
@@ -80,9 +143,13 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>)
 
         let mut prev_kind = current_kind;
         current_kind = character_kind(character, current_kind);
+        if escaped {
+            current_kind = Some(tokens::Kind::Symbol);
+        }
 
         let string_start = character == '"'
-            && prev_kind != Some(tokens::Kind::String);
+            && prev_kind != Some(tokens::Kind::String)
+            && !escaped;
         if string_start {
             current_kind = None;
         }
@@ -185,6 +252,7 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>)
             old_kind = current_kind;
             token_start = line_bytes - 1;
         }
+        escaped = false;
     }
 
     Ok(tokens)
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
@@ -83,9 +83,21 @@ pub fn parse(tokens : &[Token])
         Kind::LParen => {
             // Parse list.
             let mut slice = &tokens[1..];
+            if slice.is_empty() {
+                return Err(ParseError(
+                    "Expected `)' (closing parenthesis), got EOF."
+                    .to_owned(), token.site.clone()));
+            }
+            // Ignore leading white space in head of list.
+            if slice[0].kind == Kind::Whitespace {
+                slice = &slice[1..];
+            }
             let mut elements = Vec::new();
             let mut token = &slice[0];
+
+            let mut i = 0;
             loop {
+                i += 1;
                 if slice.is_empty() {
                     return Err(ParseError(
                         "Expected `)' (closing parenthesis), got EOF."
@@ -93,7 +105,12 @@ pub fn parse(tokens : &[Token])
                 }
                 token = &slice[0];
                 if token.kind == Kind::RParen
-                    { break; }
+                    { break; }  // End of list.
+                if token.kind == Kind::Whitespace && i == 2 {
+                    // Skip whitespace immediately after head.
+                    slice = &slice[1..];
+                    continue;
+                }
 
                 let (element, left) = parse(&slice)?;
                 elements.push(element);