commit 3566ca3f0bdfc2c0684253c013b5d0953c898a36
parent 6782df906b1cb2c6dbc29fba56e5e2b84ae65145
Author: Demonstrandum <moi@knutsen.co>
Date: Mon, 22 Jun 2020 01:55:18 +0100
Make lexer and parser more whitespace aware.
Diffstat:
8 files changed, 117 insertions(+), 45 deletions(-)
diff --git a/src/assemble/html.rs b/src/assemble/html.rs
@@ -16,7 +16,7 @@ impl HTMLFormatter {
}
pub const DEFAULT : &str =
- "<!DOCTYPE>\n\
+ "<!DOCTYPE html>\n\
<html>\n\
<head></head>\n\
<body></body>\n\
@@ -43,7 +43,7 @@ impl Documentise for HTMLFormatter {
if has_declaration {
current_node = &self.tree[1];
} else {
- doc += "<!DOCTYPE html>"
+ doc += "<!DOCTYPE html>\n"
}
// Check if <html></html> root object exists.
// Check if head exits, if not, make an empty one.
@@ -69,11 +69,21 @@ impl Documentise for HTMLFormatter {
/// Converting the tree to an HTML string.
impl Display for HTMLFormatter {
fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result {
- for node in &self.tree {
+ let mut tree_iter = self.tree.iter().peekable();
+ while let Some(node) = tree_iter.next() {
match node {
- ParseNode::Symbol(node) => write!(f, " {}", node.value)?,
- ParseNode::Number(node) => write!(f, " {}", node.value)?,
- ParseNode::String(node) => write!(f, " {}", node.value)?,
+ ParseNode::Symbol(node)
+ | ParseNode::Number(node) => {
+ // If symbol ahead is so-called "symbolic", we can
+ // infere there was a space between them.
+ write!(f, "{}", node.value)?;
+ if let Some(peek) = tree_iter.peek() {
+ if peek.symbolic().is_some() {
+ write!(f, " ")?
+ }
+ }
+ },
+ ParseNode::String(node) => write!(f, "{}", node.value)?,
ParseNode::List(list) => {
let head = list.first();
let mut tag = "";
@@ -92,9 +102,12 @@ impl Display for HTMLFormatter {
// Declarations behave differently.
if tag.as_bytes()[0] == '!' as u8 {
- // TODO: Following can only be symbols.
while !rest.is_empty() {
- write!(f, " {}", rest[0])?;
+ if let Some(node) = rest[0].symbolic() {
+ write!(f, " {}", node.value)?;
+ } else {
+ // TODO: Make and send error (can only be symbolic).
+ }
rest = &rest[1..];
}
write!(f, ">")?;
@@ -109,13 +122,13 @@ impl Display for HTMLFormatter {
// Error! Cannot be non atomic.
}
}
- writeln!(f, ">")?;
+ write!(f, ">")?;
let html_fmt = HTMLFormatter::new(rest.to_owned());
- writeln!(f, "{}", html_fmt)?;
+ write!(f, "{}", html_fmt)?;
write!(f, "</{}>", tag)?;
},
- _ => write!(f, "hi")?,
+ _ => panic!("Uh {:?}", node),
}
}
write!(f, "")
diff --git a/src/bin.rs b/src/bin.rs
@@ -50,9 +50,10 @@ fn main() -> Result<(), Box<dyn Error>> {
for file in files {
let tree = seam::parse_file(&file)?;
- /*eprintln!("{}", &tree
+ #[cfg(feature="debug")]
+ eprintln!("{}", &tree
.iter().fold(String::new(),
- |acc, s| acc + "\n" + &s.to_string()));*/
+ |acc, s| acc + "\n" + &s.to_string()));
if target == "html" {
let fmt = seam::assemble::html::HTMLFormatter::new(tree);
let result = fmt.document();
diff --git a/src/lib.rs b/src/lib.rs
@@ -11,6 +11,8 @@ pub const VERSION : (u8, u8, u8) = (0, 1, 0);
pub fn parse<P: AsRef<Path>>(string : String, source : Option<P>)
-> Result<parser::ParseTree, Box<dyn Error>> {
let tokens = lexer::lex(string, source)?;
+ #[cfg(feature="debug")]
+ eprintln!("{:#?}", &tokens);
let tree = parser::parse_stream(tokens)?;
Ok(tree)
}
diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs
@@ -15,6 +15,10 @@ impl fmt::Display for LexError {
impl Error for LexError { }
+fn is_whitespace(character : char) -> bool {
+ ['\n', '\r', '\t', ' '].contains(&character)
+}
+
fn character_kind(character : char, prev : Option<tokens::Kind>)
-> Option<tokens::Kind> {
let kind = match character {
@@ -47,7 +51,7 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>)
let mut line_bytes : usize = 0;
let mut accumulator : Vec<u8> = Vec::new();
- let mut tokens = Vec::new();
+ let mut tokens : TokenStream = Vec::new();
let mut token_start : usize = 0;
let mut current_kind = None;
@@ -65,33 +69,74 @@ pub fn lex<P: AsRef<Path>>(string : String, _source : Option<P>)
let character = current_byte as char;
+ if character == ';' { // EON Comment
+ let mut i = 0;
+ while string.as_bytes()[bytes + i] != '\n' as u8 {
+ i += 1;
+ }
+ bytes += i;
+ continue;
+ }
+
let mut prev_kind = current_kind;
current_kind = character_kind(character, current_kind);
let string_start = character == '"'
- && prev_kind != Some(tokens::Kind::String);
+ && prev_kind != Some(tokens::Kind::String);
if string_start {
current_kind = None;
}
- let mut peek_kind = if bytes == eof - 1 {
+ let peek_char = if bytes == eof - 1 {
None
} else {
let peek_char = string.as_bytes()[bytes + 1] as char;
- character_kind(peek_char, current_kind)
+ Some(peek_char)
};
+ let mut peek_kind = if let Some(peeked) = peek_char {
+ character_kind(peeked, current_kind)
+ } else { None };
+
+ let some_lparen = Some(tokens::Kind::LParen);
+ let some_rparen = Some(tokens::Kind::RParen);
+
+ let was_lparen = current_kind == some_lparen;
+ let was_rparen = current_kind == some_rparen;
+
+ let peek_string = peek_char == Some('"');
+ let peek_lparen = peek_kind == some_lparen;
+ let peek_rparen = peek_kind == some_rparen;
- let was_lparen = current_kind == Some(tokens::Kind::LParen);
- let was_rparen = current_kind == Some(tokens::Kind::RParen);
- let peek_rparen = peek_kind == Some(tokens::Kind::RParen);
if was_lparen || was_rparen {
peek_kind = None;
prev_kind = None;
- }
- if peek_rparen {
+ } else if peek_rparen || peek_lparen {
+ peek_kind = None;
+ } else if peek_string {
peek_kind = None;
}
+ // If we're on a whitespace, and there's a bracket (or quote) ahead,
+ // we need to explicitly say there's whitespace between the
+ // last token and the next bracket/quotation.
+ // (Ignore the whitespace, if it is consecutive to another whitespace)
+ match tokens.last() {
+ Some(token) if token.kind != tokens::Kind::Whitespace
+ && token.kind != tokens::Kind::Keyword
+ && is_whitespace(character)
+ && (peek_rparen
+ || peek_lparen
+ || peek_char == Some('"')
+ || token.kind == tokens::Kind::String
+ || token.kind == tokens::Kind::RParen) => {
+ let kind = tokens::Kind::Whitespace;
+ let site = tokens::Site::from_line(lines, line_bytes, 1);
+ let value = character.to_string();
+ tokens.push(Token::new(kind, value, site));
+ },
+ Some(_) | None => (),
+ }
+
if let Some(kind_current) = current_kind {
if prev_kind.is_none() {
old_kind = current_kind;
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
@@ -32,6 +32,13 @@ pub enum ParseNode {
}
impl ParseNode {
+ pub fn symbolic(&self) -> Option<Node> {
+ match self {
+ Self::Symbol(node)
+ | Self::Number(node) => Some(node.to_owned()),
+ _ => None
+ }
+ }
pub fn atomic(&self) -> Option<Node> {
match self {
Self::Symbol(node)
@@ -62,6 +69,7 @@ fn parse_atomic(token : &Token) -> Result<ParseNode, ParseError> {
Kind::Symbol => Ok(ParseNode::Symbol(node)),
Kind::String => Ok(ParseNode::String(node)),
Kind::Number => Ok(ParseNode::Number(node)),
+ Kind::Whitespace => Ok(ParseNode::String(node)),
_ => Err(ParseError(
String::from("Atomic token not found here."),
token.site.clone()))
@@ -96,11 +104,17 @@ pub fn parse(tokens : &[Token])
},
Kind::Keyword => {
// Parse second token, make attribute.
- let (node, slice) = parse(&tokens[1..])?;
+ let (node, mut slice) = parse(&tokens[1..])?;
let attribute = AttributeNode {
keyword: token.value[1..].to_owned(),
node: Box::new(node)
};
+ // White space after attributes don't count.
+ if let Some(next) = slice.first() {
+ if next.kind == Kind::Whitespace {
+ slice = &slice[1..];
+ }
+ }
Ok((ParseNode::Attribute(attribute), slice))
},
Kind::RParen => {
@@ -127,12 +141,19 @@ pub fn parse_stream(tokens: tokens::TokenStream)
}
/// Pretty printing for parse nodes.
+#[cfg(feature="debug")]
impl fmt::Display for ParseNode {
fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseNode::Symbol(node)
| ParseNode::Number(node) => write!(f, "{}", &node.value),
- ParseNode::String(node) => write!(f, "\"{}\"", &node.value),
+ ParseNode::String(node) => {
+ if node.value.trim().is_empty() {
+ write!(f, "")
+ } else {
+ write!(f, "\"{}\"", &node.value)
+ }
+ },
ParseNode::Attribute(attr) => write!(f, ":{} {}",
&attr.keyword, &*attr.node),
ParseNode::List(list) => write!(f, "({}{})", &list[0],
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
@@ -36,6 +36,7 @@ pub enum Kind {
String,
Number,
Keyword,
+ Whitespace,
}
#[derive(Debug, Clone)]
diff --git a/test.html b/test.html
@@ -1,21 +1,9 @@
-<!DOCTYPE html><html>
-<head>
-<title>
- Example HTML Document
-</title>
-</head><body>
-<p id="hello">
- Hello, World!
-</p><p>
- something something text...
-</p><h1>
- A (big) Header!
-</h1><p>
- Yet some more<span style="color: red">
- text
-</span> <3
-</p><img alt="Cute Cat" src="https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" width="300">
+<!DOCTYPE html>
+<html> <head> <title>Example HTML Document</title></head>
+<body> <p id="hello">Hello, World!</p>
+<p>something something text...</p>
+<h1> A (big) Header!</h1>
+<p>Yet some more <span style="color: red">text</span> <3</p>
+<p>Hello<span style="color: green">World</span>!</p>
+<img alt="Cat" src="https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" width="300"></img></body></html>
-</img>
-</body>
-</html>
diff --git a/test.sex b/test.sex
@@ -8,8 +8,9 @@
(h1 "A (big) Header!")
(p Yet some more
(span :style "color: red" text) <3)
+ (p Hello(span :style "color: green" World)!)
(img
- :alt "Cute Cat"
+ :alt Cat
:src "https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg"
:width 300)))