seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 07de6e47c5e479c5deadf225c4b8d62d0dc076ae
parent 3566ca3f0bdfc2c0684253c013b5d0953c898a36
Author: Demonstrandum <moi@knutsen.co>
Date:   Mon, 22 Jun 2020 04:37:41 +0100

Added support for tripple quote expressions.

Diffstat:
MCargo.toml | 2++
Msamples/xml-example-1.sex | 2+-
Msamples/xml-example-2.sex | 4++--
Msrc/parse/lexer.rs | 78+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Msrc/parse/parser.rs | 19++++++++++++++++++-
5 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -6,6 +6,8 @@ version = "0.1.0" authors = ["Demonstrandum <moi@knutsen.co>"] edition = "2018" +[features] +debug = [] [lib] name = "seam" diff --git a/samples/xml-example-1.sex b/samples/xml-example-1.sex @@ -1,7 +1,7 @@ (message :status urgernt (to Tove) (from Jani) - (heading A reminder \(Again!\) for you) + (heading A \"reminder\" \(Again!\) for you) (body Don't forget me this weekend!)) diff --git a/samples/xml-example-2.sex b/samples/xml-example-2.sex @@ -10,7 +10,7 @@ (name "French(-style) Toast") (price $4.50) (description """ - Thick slices made from our homemade + Thick slices made from our "homemade" sourdough bread. """) (calories 600)) @@ -21,6 +21,6 @@ Two eggs, bacon or sausage, toast, and our ever-popular hash browns.) (calories "950"))) - + diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs @@ -4,7 +4,7 @@ use std::path::Path; use std::{fmt, error::Error}; #[derive(Debug, Clone)] -pub struct LexError(Token, String); +pub struct LexError(tokens::Site, String); impl fmt::Display for LexError { fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { @@ -42,7 +42,7 @@ fn character_kind(character : char, prev : Option<tokens::Kind>) } } -pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) +pub fn lex<P: AsRef<Path>>(string : String, source : Option<P>) -> Result<TokenStream, LexError> { let eof = string.len(); @@ -56,6 +56,7 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) let mut token_start : usize = 0; let mut current_kind = None; let mut old_kind = None; + let mut escaped = false; while bytes < eof { let current_byte = string.as_bytes()[bytes]; @@ -69,9 +70,71 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) let character = current_byte as char; - if character == ';' { // EON Comment + // Tripple quoted string: + if character == '"' && &string[bytes..bytes + 3] == "\"\"\"" { + token_start = line_bytes; + let start_line = lines; + bytes += 3; + line_bytes += 3; + while &string[bytes..bytes + 3] != "\"\"\"" { + if string[bytes..].is_empty() { + let mut site = tokens::Site::from_line( + lines, line_bytes, 1); + site.source = source + .map(|e| e.as_ref().display().to_string()); + return Err(LexError(site, + String::from("Unclosed tripple-quoted string."))); + } + let c = string.as_bytes()[bytes]; + if c == '\n' as u8 { + lines += 1; + line_bytes = 0; + } + accumulator.push(c); + bytes += 1; + line_bytes += 1; + } + bytes += 3; + line_bytes += 3; + current_kind = None; + + let span = accumulator.len() + 3 + 3; + tokens.push(Token::new(tokens::Kind::String, + String::from_utf8(accumulator).unwrap(), + tokens::Site::from_line(start_line, + token_start, span))); + accumulator = Vec::new(); + continue; + } + + if character == '\\' { // Escapes + if current_kind == Some(tokens::Kind::String) { + // How escapes work in strings: + } else { + // How they work outside strings: + if bytes + 1 == eof { + continue; + } + match string.as_bytes()[bytes + 1] as char { + '\n' | '\r' | ' ' | '\t' => { + current_kind = None; + bytes += 1; + line_bytes += 1; + }, + _ => () + } + escaped = true; + bytes += 1; + line_bytes += 1; + continue; + } + } + + // EON Comments: + if character == ';' && current_kind != Some(tokens::Kind::String) { let mut i = 0; - while string.as_bytes()[bytes + i] != '\n' as u8 { + while bytes < eof + && string.as_bytes()[bytes + i] != '\n' as u8 { i += 1; } bytes += i; @@ -80,9 +143,13 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) let mut prev_kind = current_kind; current_kind = character_kind(character, current_kind); + if escaped { + current_kind = Some(tokens::Kind::Symbol); + } let string_start = character == '"' - && prev_kind != Some(tokens::Kind::String); + && prev_kind != Some(tokens::Kind::String) + && !escaped; if string_start { current_kind = None; } @@ -185,6 +252,7 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>) old_kind = current_kind; token_start = line_bytes - 1; } + escaped = false; } Ok(tokens) diff --git a/src/parse/parser.rs b/src/parse/parser.rs @@ -83,9 +83,21 @@ pub fn parse(tokens : &[Token]) Kind::LParen => { // Parse list. let mut slice = &tokens[1..]; + if slice.is_empty() { + return Err(ParseError( + "Expected `)' (closing parenthesis), got EOF." + .to_owned(), token.site.clone())); + } + // Ignore leading white space in head of list. + if slice[0].kind == Kind::Whitespace { + slice = &slice[1..]; + } let mut elements = Vec::new(); let mut token = &slice[0]; + + let mut i = 0; loop { + i += 1; if slice.is_empty() { return Err(ParseError( "Expected `)' (closing parenthesis), got EOF." @@ -93,7 +105,12 @@ pub fn parse(tokens : &[Token]) } token = &slice[0]; if token.kind == Kind::RParen - { break; } + { break; } // End of list. + if token.kind == Kind::Whitespace && i == 2 { + // Skip whitespace immediately after head. + slice = &slice[1..]; + continue; + } let (element, left) = parse(&slice)?; elements.push(element);