seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 18a44356eeb5c5c560a05678e038d4ad44ef46b5
parent e91b29cfa7dad116e264a04e1d9c4a16308cb9ef
Author: Demonstrandum <samuel@knutsen.co>
Date:   Fri, 28 Jun 2024 18:41:39 +0100

Major revamp to how unicode and whitespace is handled.

Previously, whitespace was implied based on the semantics of different
tokens/nodes. This was majorly stupid, ended up creating the need for
special whitespace tokens and nodes, as well as fake empty strings
and all sorts of shenanigans. Instead, each token (and node) now just
stores a slice of all the whitespace that proceeded it from the
source-code itself.

We also actually read unicode (utf8) correctly, instead of having the
lexer basically just break on non-ASCII text.

Diffstat:
MCargo.lock | 360+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
MCargo.toml | 12+++++++-----
MREADME.md | 48+++++++++++++++++++++++++++++++++---------------
MUSAGE.md | 2+-
Msamples/js-concept.sex | 2+-
Rtest.html -> samples/tests/html-test-1-2020-12-01.html | 0
Asamples/tests/html-test-1-2024-06-28.html | 39+++++++++++++++++++++++++++++++++++++++
Asamples/tests/html-test-1.sex | 16++++++++++++++++
Asamples/tests/html-test-2.sex | 23+++++++++++++++++++++++
Rtest-css.sex -> samples/tests/test-css.sex | 0
Msrc/assemble/css.rs | 89++++++++++++++++++++++++++++++++++++-------------------------------------------
Msrc/assemble/html.rs | 232+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/assemble/mod.rs | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Asrc/assemble/sexp.rs | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/assemble/xml.rs | 117++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Msrc/bin.rs | 124+++++++++++++++++++++++++++++++++++++++----------------------------------------
Msrc/lib.rs | 32+++++++++++++++-----------------
Msrc/parse/expander.rs | 625++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Msrc/parse/lexer.rs | 551+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Msrc/parse/mod.rs | 15++++++---------
Msrc/parse/parser.rs | 520+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Msrc/parse/tokens.rs | 125+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Dtest.sex | 16----------------
23 files changed, 1983 insertions(+), 1125 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -3,53 +3,110 @@ version = 3 [[package]] -name = "atty" -version = "0.2.14" +name = "android-tzdata" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ - "hermit-abi", "libc", - "winapi", ] [[package]] name = "autocfg" -version = "1.0.1" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cc" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" + +[[package]] +name = "cfg-if" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.19" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ - "libc", - "num-integer", + "android-tzdata", + "iana-time-zone", + "js-sys", "num-traits", - "time", - "winapi", + "wasm-bindgen", + "windows-targets 0.52.5", ] [[package]] name = "colored" -version = "1.9.3" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4ffc801dacf156c5854b9df4f425a626539c3a6ef7893cc0c5084a23f0b6c59" +checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" dependencies = [ - "atty", "lazy_static", - "winapi", + "windows-sys", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "descape" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "396a0a312bef78b5f62b0251d7162c4b8af162949b8b104d2967e41b26c1b68c" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", ] [[package]] -name = "hermit-abi" -version = "0.1.18" +name = "iana-time-zone-haiku" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "libc", + "cc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", ] [[package]] @@ -60,72 +117,271 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.90" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4aede83fc3617411dc6993bc8c70919750c1c257c6ca6a502aed6e0e2394ae" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] -name = "num-integer" -version = "0.1.44" +name = "log" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", - "num-traits", ] [[package]] -name = "num-traits" -version = "0.2.14" +name = "once_cell" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" dependencies = [ - "autocfg", + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", ] [[package]] name = "seam" -version = "0.1.8" +version = "0.2.0" dependencies = [ "chrono", "colored", + "descape", + "unicode-width", ] [[package]] -name = "time" -version = "0.1.44" +name = "syn" +version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ - "libc", - "wasi", - "winapi", + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-width" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", ] [[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +name = "windows-targets" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] [[package]] -name = "winapi" -version = "0.3.9" +name = "windows-targets" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "windows_x86_64_msvc" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/Cargo.toml b/Cargo.toml @@ -4,11 +4,12 @@ description = "Symbolic Expressions As Markup." keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"] license-file = "LICENSE" homepage = "https://git.knutsen.co/seam" -version = "0.1.8" -authors = ["Demonstrandum <moi@knutsen.co>"] -edition = "2018" +version = "0.2.0" +authors = ["Demonstrandum <samuel@knutsen.co>"] +edition = "2021" [features] +default = ["debug"] debug = [] [lib] @@ -20,6 +21,7 @@ name = "seam" path = "src/bin.rs" [dependencies] -colored = "1.8" +colored = "2.1" chrono = "0.4" - +unicode-width = "0.1.12" +descape = "1.1.2" diff --git a/README.md b/README.md @@ -20,9 +20,10 @@ Read the [USAGE.md](USAGE.md) file for code examples and documentation. ### Current Formats - - XML - - HTML - - CSS + - XML (`--xml`; including: SVG, MathML) + - HTML (`--html`; SGML) + - CSS (`--css`) + - SEXP (`--sexp`; S-expression, basically a macro expansion utility) ### Installation @@ -68,39 +69,56 @@ seam --html <<< "(p Hello World)" # <head></head> # <body> # <p>Hello World</p> -# <!-- Generated by SEAM. --> # </body> # </html> ``` ```sh +seam --html --nodocument <<< "(p Hello World)" +#stdout: +# <p>Hello World</p> +``` +```sh seam --xml <<< '(para Today is a day in (%date "%B, year %Y").)' #stdout: # <?xml version="1.0" encoding="UTF-8" ?> # <para>Today is a day in November, year 2020.</para> -# <!-- Generated by SEAM. --> +``` +```sh +seam --sexp <<< '(hello (%define subject world) %subject)' +#stdout: +# (hello world) ``` ## TODO - - Rewrite lexer to only insert whitespace before and after {`(`, `)`} and - next to string-literals. Whitespace should then be added between between - symbols *after* macro expansion, since a macro could expand to any literal. - Variadic macros should preserve whitespace in its arguments entirely (no stripping). + - Escape evaluating macros with `\%`. + - `(%format "{}")` macro with Rust's `format` syntax. + - Implement lexical scope by letting macros store a copy of the scope they were defined in (or a reference?). + - `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string. + - Variadic arguments via `&rest` syntax. + - Delayed evaluation of macros by `%(...)` synatx. + For example `%(f x y)` is the same as `(%f x y)`, so you can have `(%define uneval f x)` and then write `%(%uneval y)`. - `%list` macro which expands from `(p (%list a b c))` to `(p a b c)`. - This is essentially an anonymous macro definition, i.e `(%define L a b c)`, - then `%L` is the same as `(%list a b c)`. + Defined as such: + ```lisp + (%define (list &rest) rest) + ``` - `%for`-loop macro, iterating over `%list`s. - `%glob` which returns a list of files/directories matching a glob. - - `%markdown` renders markdown given to it. + - `%markdown` renders Markdown given to it as html. - `%html`, `%xml`, `%css`, etc. macros which goes into the specific rendering mode. - Add variadic and keyword macro arguments. - Caching or checking time-stamps as to not regenerate unmodified source files. - HTML object `style="..."` object should handle s-expressions well, (e.g. `(p :style (:color red :border none) Hello World)`) - - HTML `<style>` tag should allow for *normal* CSS syntax if just given a string. - - Allow for, and handle special `@` syntax in CSS, such as `@import` and `@media`. - Add more supported formats (`JSON`, `JS`, `TOML`, &c.). + - Maybe: a whole JavaScript front-end, e.g. + ```lisp + (let x 2) + (let (y 1) (z 1)) + (const f (=> (a b) (+ a b)) + ((. console log) (== (f y z) x)) + ``` - Add more helpful/generic macros (e.g. `(%include ...)`, which already exists). - Allow for arbitrary embedding of code, that can be run by a LISP interpreter (or any other langauge), for example. (e.g. `(%chez (+ 1 2))` executes `(+ 1 2)` with Chez-Scheme LISP, and places the result in the source (i.e. `3`). - diff --git a/USAGE.md b/USAGE.md @@ -79,7 +79,7 @@ will be treated as macros. *File `index.sex`:* ```lisp (!doctype html) -(html +(html :lang en (head (%define title "Index page") (%include "head.sex")) (body diff --git a/samples/js-concept.sex b/samples/js-concept.sex @@ -42,7 +42,7 @@ (= (. sketch fill) (Color `transparent`)) (= sketch.stroke (Color `black`)) -(const gaussian (=> (= mean 0) (= σ 1) +(const gaussian (=> ((= mean 0) (= σ 1)) (const ϑ (* (. Math TAU) ((. Math random))) (const ρ (Math.sqrt (* -2 (Math.log (- 1 (Math.random)))))) (const radius (* σ ρ)) diff --git a/test.html b/samples/tests/html-test-1-2020-12-01.html diff --git a/samples/tests/html-test-1-2024-06-28.html b/samples/tests/html-test-1-2024-06-28.html @@ -0,0 +1,39 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <title>Example HTML Document</title> + <style> +html { + width: 100%; + height: 100%; +} + +html , body { + margin: 0; + padding: 0; +} + +body { + padding: 4em 6em; +} + +#hello { + color: rgb(24 calc((3 + (7 * 3) + 1)) 4); + font-family: sans-serif; +} + +img { + border-radius: 5px; +} + </style> + </head> + <body> + <p id="hello">Hello, World!</p> + <p>something something text...</p> + <h1>A (big) Header!</h1> + <p>Yet some more + <span style="color: red">text</span> <3</p> + <p>Hello<span style="color: green">World</span>!</p> + <img alt="Cute cat" src="https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" width="300"> + </body> +</html> diff --git a/samples/tests/html-test-1.sex b/samples/tests/html-test-1.sex @@ -0,0 +1,16 @@ +(!DOCTYPE html) +(html :lang en ; language is set to english. + (head + (title Example HTML Document) + (style (%include "./test-css.sex"))) + (body + (p :id hello Hello, World!) + (p something something text...) + (h1 "A (big) Header!") + (p Yet some more + (span :style "color: red" text) <3) + (p Hello(span :style "color: green" World)!) + (img + :alt "Cute cat" + :src "https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" + :width 300))) diff --git a/samples/tests/html-test-2.sex b/samples/tests/html-test-2.sex @@ -0,0 +1,23 @@ +(!DOCTYPE html) +; A document can contain +; any number of comments too. +(html :lang en + (head + (%define title My Page) + (title %title - (Example HTML Document)) + (style (%include test-css.sex))) + (body + (p :id hello Hello, World!) + (p something something text...) + (h1 A big header) + (p Yet some more + (span text) <3) + (p Hello(span World)!) + (pre + nothing %title gets + evaluated in here) + (script :id javascript + (%define CONSTANT 3.14) + let my_num = (1 + %CONSTANT) * 2; + console.log(my_num); + ))) diff --git a/test-css.sex b/samples/tests/test-css.sex diff --git a/src/assemble/css.rs b/src/assemble/css.rs @@ -1,16 +1,16 @@ //! Assembles an expanded tree into valid CSS. use super::{GenerationError, MarkupDisplay, Formatter}; -use crate::parse::parser::{self, ParseNode, ParseTree}; +use crate::parse::parser::{ParseNode, ParseTree}; use std::slice::Iter; #[derive(Debug, Clone)] -pub struct CSSFormatter { - pub tree : ParseTree, +pub struct CSSFormatter<'a> { + pub tree : ParseTree<'a>, } -impl CSSFormatter { - pub fn new(tree : ParseTree) -> Self { +impl<'a> CSSFormatter<'a> { + pub fn new(tree: ParseTree<'a>) -> Self { Self { tree } } } @@ -57,11 +57,10 @@ const CSS_ONELINE_RULES : [&str; 3] /// or at least I think. const BINARY_OPERATORS : [&str; 4] = ["+", "-", "*", "/"]; -fn convert_value(node : &ParseNode) -> Result<String, GenerationError> { +fn convert_value<'a>(node: &'a ParseNode<'a>) -> Result<String, GenerationError<'a>> { match node { - ParseNode::List(list) => { - let list = parser::strip(list, false); - let result = match list.as_slice() { + ParseNode::List { nodes: list, .. } => { + let result = match &**list { [head, tail@..] => { let head = convert_value(head)?; @@ -100,7 +99,7 @@ fn convert_value(node : &ParseNode) -> Result<String, GenerationError> { } else { node.value.to_owned() }), - ParseNode::Attribute(_) => Err(GenerationError::new("CSS-value", + ParseNode::Attribute { .. } => Err(GenerationError::new("CSS-value", "Incompatible structure (attribute) found in CSS \ property value.", &node.site())) @@ -110,8 +109,8 @@ fn convert_value(node : &ParseNode) -> Result<String, GenerationError> { /// Function responsible for translating a CSS value (i.e. /// a value of a CSS property) from some s-expression into /// a valid CSS value. -pub fn css_value(_property : &str, node : &ParseNode) --> Result<String, GenerationError> { +pub fn css_value<'a>(_property : &str, node: &'a ParseNode<'a>) +-> Result<String, GenerationError<'a>> { // Naïve way (in future consider the type of property, // and take care of special cases): convert_value(node) @@ -123,18 +122,17 @@ pub fn css_value(_property : &str, node : &ParseNode) /// (@symbol :attr arg) -> @symbol (attr: arg); /// (@symbol (select :prop val)) -> @symbol { select { prop: val; } } /// (@sym x :attr arg (sel :prop val)) -> @sym x (attr: arg) { sel { prop: val; } } -fn generate_special_selector - (f: Formatter, - selector: &str, - arguments: Iter<ParseNode>) -> Result<(), GenerationError> { +fn generate_special_selector<'a>(f: Formatter, + selector: &str, + arguments: Iter<'a, ParseNode<'a>>) + -> Result<(), GenerationError<'a>> { // Deal with oneline rules quickly. if CSS_ONELINE_RULES.contains(&selector) { write!(f, "{} ", selector)?; for arg in arguments { match arg { - ParseNode::Attribute(attr) => { - let kw = &attr.keyword; - write!(f, "({}: {}) ", kw, css_value(kw, &*attr.node)?)?; + ParseNode::Attribute { ref keyword, node, .. } => { + write!(f, "({}: {}) ", keyword, css_value(keyword, &*node)?)?; }, _ => write!(f, "{} ", css_value(selector, arg)?)? } @@ -146,10 +144,11 @@ fn generate_special_selector write!(f, "{} ", selector)?; let mut parsing_rules = false; - let unexpected_node = |node: &ParseNode, rules: bool| { + let unexpected_node = |node: &'a ParseNode<'a>, rules: bool| { if rules { Err(GenerationError::new("CSS", - "Expected list (i.e. a CSS rule) here!", &node.site())) + "Expected list (i.e. a CSS rule) here!", + &node.site())) } else { Ok(()) } @@ -157,17 +156,18 @@ fn generate_special_selector for arg in arguments { match arg { - ParseNode::Attribute(attr) => { + ParseNode::Attribute { ref keyword, node, .. } => { unexpected_node(&arg, parsing_rules)?; - let kw = &attr.keyword; - write!(f, "({}: {}) ", kw, css_value(kw, &*attr.node)?)?; + write!(f, "({}: {}) ", keyword, css_value(keyword, &*node)?)?; }, - ParseNode::List(rule) => { // Now we parse nested rules! + ParseNode::List { nodes: ref rule, leading_whitespace, .. } => { + // Now we parse nested rules! if !parsing_rules { writeln!(f, "{{")?; } parsing_rules = true; - generate_css_rule(f, &rule)?; + write!(f, "{}", leading_whitespace)?; + generate_css_rule(f, rule)?; }, _ => { unexpected_node(&arg, parsing_rules)?; @@ -175,13 +175,12 @@ fn generate_special_selector } } } - writeln!(f, "}}")?; + write!(f, "}}")?; Ok(()) } -fn generate_css_rule(f: Formatter, list: &[ParseNode]) -> Result<(), GenerationError> { - let stripped = parser::strip(list, false); - let mut iter = stripped.iter(); +fn generate_css_rule<'a>(f: Formatter, list: &'a [ParseNode<'a>]) -> Result<(), GenerationError<'a>> { + let mut iter = list.iter(); let mut prop_i = 0; // Index of first property. // TODO: Selector functions such as nth-child(...), etc. // e.g. (ul li(:nth-child (+ 2n 1))) -> ul li:nth-child(2n + 1). @@ -217,50 +216,43 @@ fn generate_css_rule(f: Formatter, list: &[ParseNode]) -> Result<(), GenerationE let properties = iter.skip(prop_i - 1); for property in properties { - if let ParseNode::Attribute(property) = property { - let value = &property.node; - writeln!(f, " {}: {};", - &property.keyword, - css_value(&property.keyword, value)?)?; - } else { + let ParseNode::Attribute { ref node, ref keyword, .. } = property else { return Err(GenerationError::new("CSS", "CSS property-value pairs must be in the \ form of attributes, i.e. `:property value`.", &property.site())); - } + }; + writeln!(f, " {}: {};", keyword, css_value(keyword, node)?)?; } - writeln!(f, "}}")?; - + write!(f, "}}")?; Ok(()) } -impl MarkupDisplay for CSSFormatter { - +impl<'a> MarkupDisplay for CSSFormatter<'a> { fn document(&self) -> Result<String, GenerationError> { let mut doc = String::new(); if self.tree.is_empty() { return Ok(String::from(DEFAULT)); } doc += &self.display()?; - doc += "\n/* Generated from symbolic-expressions, with SEAM */\n"; Ok(doc) } - fn generate(&self, f : Formatter) + fn generate(&self, f: Formatter) -> Result<(), GenerationError> { let mut tree_iter = self.tree.iter().peekable(); while let Some(node) = tree_iter.next() { match node { - ParseNode::List(list) => { - generate_css_rule(f, list)?; + ParseNode::List { nodes: list, leading_whitespace, .. } => { + write!(f, "{}", leading_whitespace)?; + generate_css_rule(f, &*list)?; }, - ParseNode::Attribute(attr) => { - let site = attr.site.to_owned(); + ParseNode::Attribute { site, .. } => { return Err(GenerationError::new("CSS", "Attribute not expected here, CSS documents \ are supposed to be a series of selectors \ and property-value pairs, wrapped in parentheses.", - &site)); + &site.to_owned())); }, ParseNode::Symbol(node) | ParseNode::Number(node) @@ -280,4 +272,3 @@ impl MarkupDisplay for CSSFormatter { Ok(()) } } - diff --git a/src/assemble/html.rs b/src/assemble/html.rs @@ -1,21 +1,26 @@ //! Assembles an expanded tree into valid HTML. -use super::{GenerationError, MarkupDisplay, Formatter}; -use super::css::CSSFormatter; +use super::{escape_xml, GenerationError, MarkupDisplay, Formatter}; +use super::{ + sexp::SExpFormatter, + xml::XMLFormatter, + css::CSSFormatter, +}; use crate::parse::parser::{ParseNode, ParseTree, SearchTree, SearchType}; +use crate::parse::tokens; #[derive(Debug, Clone)] -pub struct HTMLFormatter { - pub tree : ParseTree, +pub struct HTMLFormatter<'a> { + pub tree: ParseTree<'a>, } -impl HTMLFormatter { - pub fn new(tree : ParseTree) -> Self { +impl<'a> HTMLFormatter<'a> { + pub fn new(tree: ParseTree<'a>) -> Self { Self { tree } } } -pub const DEFAULT : &str = +pub const DEFAULT: &str = "<!DOCTYPE html>\n\ <html>\n\ <head></head>\n\ @@ -24,7 +29,25 @@ pub const DEFAULT : &str = </body>\n\ </html>\n"; -impl MarkupDisplay for HTMLFormatter { +/// HTML void elements do not get a closing `</...>` tag. They are self-closing. +const VOID_ELEMENTS: [&str; 14] = [ + "area", + "base", + "br", + "col", + "embed", + "hr", + "img", + "input", + "link", + "meta", + "param", + "source", + "track", + "wbr", +]; + +impl<'a> MarkupDisplay for HTMLFormatter<'a> { fn document(&self) -> Result<String, GenerationError> { let mut doc = String::new(); if self.tree.is_empty() { @@ -45,7 +68,8 @@ impl MarkupDisplay for HTMLFormatter { = self.tree.search_node(SearchType::ListHead, "body", true, 2); if doctype_tag.is_none() { - eprintln!("no doctype found"); + #[cfg(feature="debug")] + eprintln!("html: no doctype found in document"); doc += "<!DOCTYPE html>\n"; if html_tag.is_none() { doc += "<html>\n"; @@ -59,7 +83,7 @@ impl MarkupDisplay for HTMLFormatter { } // Populate. doc += &self.display()?; - doc += "\n<!-- Generated by SEAM. -->\n"; + doc += "\n"; if doctype_tag.is_none() { if html_tag.is_none() { @@ -70,6 +94,7 @@ impl MarkupDisplay for HTMLFormatter { } } + if doc.ends_with('\n') { let _ = doc.pop(); } Ok(doc) } @@ -80,29 +105,34 @@ impl MarkupDisplay for HTMLFormatter { match node { ParseNode::Symbol(node) | ParseNode::Number(node) => { - // If symbol ahead is so-called "symbolic", we can - // infere there was a space between them. - write!(f, "{}", node.value)?; - if let Some(peek) = tree_iter.peek() { - if peek.symbolic().is_some() { - write!(f, " ")? - } - } + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))?; }, - ParseNode::String(node) => write!(f, "{}", node.value)?, - ParseNode::List(list) => { + ParseNode::String(node) => { + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))?; + }, + ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => { + write!(f, "{}", leading_whitespace)?; let head = list.first(); - let mut tag = ""; + let tag: &str; // html <tag> name. if let Some(head_node) = head { if let ParseNode::Symbol(head_symbol) = head_node { tag = &head_symbol.value; write!(f, "<{}", tag)?; } else { - // TODO: Error, tags can only have symbol values. + // Error, tags can only have symbol values. + return Err(GenerationError::new("HTML", + "HTML tags can only be given as symbols.", + head_node.site())); } } else { - // TODO: Error, empty tags not supported. + // Error, empty tags not supported. + return Err(GenerationError::new("HTML", + "Empty lists cannot be converted into a valid HTML tag.", + node.site())); } + let tag = tag.to_ascii_lowercase(); let mut rest = &list[1..]; @@ -122,47 +152,155 @@ impl MarkupDisplay for HTMLFormatter { continue; } - while let Some(ParseNode::Attribute(attr)) = rest.first() { - if let Some(atom) = (*attr.node).atomic() { - write!(f, " {}=\"{}\"", attr.keyword, atom.value)?; + while let Some(ParseNode::Attribute { node, keyword, .. }) = rest.first() { + if let Some(atom) = (*node).atomic() { + write!(f, " {}=\"{}\"", keyword, atom.value)?; rest = &rest[1..]; } else { // Error! Cannot be non atomic. return Err(GenerationError::new("HTML", "Attribute cannot contain non-atomic data.", - &(*attr.node).site())); + &(*node).site())); } } write!(f, ">")?; - // <style /> tag needs to generate CSS. - if tag == "style" { // TODO: If just a string, don't convert. - writeln!(f, "")?; - let css_fmt = CSSFormatter::new(rest.to_owned()); - css_fmt.generate(f)?; + // Check early if this tag is a void element. + if VOID_ELEMENTS.binary_search(&tag.as_str()).is_ok() { + // Void elements cannot have children. + if let Some(child_node) = rest.first() { + return Err(GenerationError::new("HTML", + &format!("A void element such as `<{}>' cannot have children.", tag), + child_node.site())); + } + // Finished: void elements dont get a closing tag. + return Ok(()); + } + + // The first node to a tag should have its whitespace supressed! + // e.g. `(p hello world)` -> `<p>hello world</p>`. + // But if there's a new line, its likely it should be carreid through. + // e.g. + // ``` + // (div + // hello) + // ``` + // -> + // ``` + // <div> + // hello + // </div> + let rest_with_preserved_whitespace = rest; + let mut rest: Vec<ParseNode<'a>> = rest_with_preserved_whitespace.to_vec(); + let mut is_first_node_on_next_line = false; + if let Some(first_node) = rest.get_mut(0) { + is_first_node_on_next_line = first_node.leading_whitespace().contains('\n'); + if !is_first_node_on_next_line { + first_node.set_leading_whitespace("".to_owned()); + } + } + + // Handle tags which *do not* contain HTML as syntax: + // <pre>, <style>, <script>, <math>, <svg>, <textarea>, <title> + // Specifically: + // - <svg> and <math> contain XML, not HTML; + // - <pre>, <textarea> and <title> contain raw text, not parsed as HTML; + // - <pre> will display raw text found in source code; + // - <textarea> and <title> however, are escapable (evaluete macros); + // - <script> contains JavaScript, maybe we will parse this in the future!; + // - <style> contains CSS, which we have our own parser for already. + match tag.as_str() { + "pre" => { // <pre> should preserve the raw text in the *source* file. + // Find beginning and end byte offset of first and last token inside + // of `(pre ...)` and simply clone the text between those offsets. + let pre = raw_text(rest_with_preserved_whitespace.first(), end_token); + write!(f, "{}", pre)?; + }, + "textarea" | "title" => { // Not eaw source-code, but plain-text. + // We have to reconsititute what the source-code would look like if all + // macros were expanded by hand, and read as raw source code. + let sexp_fmt = SExpFormatter::new(rest.into_boxed_slice()); + let sexp_fmt = Box::leak(Box::new(sexp_fmt)); // TODO: Store. + sexp_fmt.generate(f)?; + }, + "style" => { // <style> tag needs to generate CSS. + // When just a string is passed, don't convert. Assume raw CSS. + if let Some(ParseNode::String(string_node)) = rest.first() { + if rest.len() != 1 { + // FIXME: Leak doesn't really matter, but should really be a better way. + let second_node = Box::leak(Box::new(rest[1].to_owned())); + return Err(GenerationError::new("HTML+CSS", + "A `style' tag can either have S-expression CSS rules, or\ + a single string containing raw CSS be passed in.\n\ + A string was passed in, but excess expressions were passed \ + in after that!", + second_node.site())); + } + // Otherwise, write that raw CSS. + write!(f, "{}", string_node.value)?; + } else { + writeln!(f, "")?; + let css_fmt = CSSFormatter::new(rest.into_boxed_slice()); + let css_fmt = Box::leak(Box::new(css_fmt)); // FIXME: store formatter. + css_fmt.generate(f)?; + } + }, + "script" => { + // TODO: Generating JavaScript from S-expressions is not implemented. + // For now, just treat it as a raw-text tag (a la <pre>). + let sexp_fmt = SExpFormatter::new(rest.into_boxed_slice()); + let sexp_fmt = Box::leak(Box::new(sexp_fmt)); // TODO: Store. + sexp_fmt.generate(f)?; + }, + "math" | "svg" => { // <math> and <svg> are subsets of XML. + let xml_fmt = XMLFormatter::new(rest.into_boxed_slice()); + let xml_fmt = Box::leak(Box::new(xml_fmt)); // FIXME: store formatter. + xml_fmt.generate(f)?; + }, + _ => { // Tag contains regular old HTML. + let html_fmt = HTMLFormatter::new(rest.into_boxed_slice()); + let html_fmt = Box::leak(Box::new(html_fmt)); // FIXME: store formatter. + html_fmt.generate(f)?; + }, + } + // Closing tag should be equally as spaced as opening tag (?) + if end_token.leading_whitespace.is_empty() { + if is_first_node_on_next_line || tag == "style" { + write!(f, "{}", leading_whitespace)?; + } } else { - let html_fmt = HTMLFormatter::new(rest.to_owned()); - html_fmt.generate(f)?; + write!(f, "{}", end_token.leading_whitespace)?; } + write!(f, "</{}>", tag)?; }, - ParseNode::Attribute(_attr) => + ParseNode::Attribute { ref site, .. } => return Err(GenerationError::new("HTML", - "Unexpected attribute encountered.", - &node.site())) + "Unexpected attribute encountered.", site)) } } Ok(()) } } - -// TODO: Convert special characters to HTML compatible ones. -// e.g. -// < => &lt; -// > => &gt; -// & => &amp; -// " => &quot; -// ! => &excl; -// etc. - +/// Get raw text in source-file between a `start_node` and some `end_token`. +/// Does not work well if the `start_node` is a result of a macro expansion, +/// it must be a plain node. +/// Especially, the first node cannot be the result of an `(%include)` macro, +/// i.e. from a different file (we explicitly crash in this case). +/// This is a limitation from the fact that we do not know what kind of markup +/// format we are targetting until *after* parsing and expanding. +fn raw_text<'a>(start_node: Option<&ParseNode<'a>>, end_token: &tokens::Token<'a>) -> &'a str { + let Some(start_node) = start_node else { + return end_token.leading_whitespace; + }; + if !std::ptr::eq(start_node.site().source_code, end_token.site.source_code) { + panic!("Start of preformatted text tag must belong to the same source location."); + } + let source: &'a str = end_token.site.source_code; + let first_node_offset = + start_node.site().bytes_from_start + - start_node.leading_whitespace().len() + + if start_node.leading_whitespace().starts_with(' ') { 1 } else { 0 }; + &source[first_node_offset..end_token.site.bytes_from_start] +} diff --git a/src/assemble/mod.rs b/src/assemble/mod.rs @@ -2,59 +2,72 @@ use crate::parse::tokens::Site; use std::{convert, fmt, error::Error}; use colored::*; +use unicode_width::UnicodeWidthStr; /// Error type for specific errors with generating /// each type of markup. #[derive(Debug, Clone)] -pub struct GenerationError { - pub markup : String, - pub message : String, - pub site : Site +pub struct GenerationError<'a> { + pub markup: String, + pub message: String, + pub site: Site<'a>, } -impl GenerationError { +impl<'a> GenerationError<'a> { /// Create a new error given the ML, the message, and the site. - pub fn new(ml : &str, msg : &str, site : &Site) -> Self { + pub fn new(ml: &str, msg: &str, site: &Site<'a>) -> Self { Self { markup: ml.to_owned(), message: msg.to_owned(), - site: site.to_owned() - } - } - /// When an error cannot be given a location, - /// or exact point of failure. - pub fn unknown(ml : &str) -> Self { - Self { - markup: ml.to_owned(), - message: String::from("Unknown generation error (bug)."), - site: Site::fake() + site: site.to_owned(), } } } /// Implement fmt::Display for user-facing error output. -impl fmt::Display for GenerationError { +impl<'a> fmt::Display for GenerationError<'a> { fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { + let line_prefix = format!(" {} |", self.site.line); + let line_view = self.site.line_slice(); + writeln!(f, "{} {}", line_prefix, line_view)?; + writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", + prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), + text_offset=self.site.line_column() - 1, + length=self.site.width())?; write!(f, "{}: {}", - format!("[{}] Error Generating {} {}", + format!("[{}] Error Generating {} ({}:{}:{})", "**".red().bold(), - self.markup.bold(), self.site).white(), + self.markup.bold(), + self.site.source, + self.site.line, + self.site.line_column(), + ).black(), self.message) } } /// Implements std::error::Error. -impl Error for GenerationError { } +impl<'a> Error for GenerationError<'a> { } + +/// Convert from an io::Error to a generation error. +impl<'a> From<std::io::Error> for GenerationError<'a> { + fn from(e: std::io::Error) -> Self { + Self { + markup: String::from("<markup>"), // FIXME. + message: format!("IO error: {}", e), + site: Site::unknown(), + } + } +} /// An fmt::Error can be cast to an equally horribly /// ambiguous GenerationError. -impl convert::From<fmt::Error> for GenerationError { - fn from(_ : fmt::Error) -> Self { +impl<'a> convert::From<fmt::Error> for GenerationError<'a> { + fn from(e: fmt::Error) -> Self { Self { - markup: String::from("Unknown"), - message: String::from( - "Unknown error while writing to format buffer"), - site: Site::fake() + markup: String::from("<markup>"), + message: format!("Format buffer error: {}", e), + site: Site::unknown(), } } } @@ -92,11 +105,36 @@ impl fmt::Display for dyn MarkupDisplay { self.generate(f).map_err(|_| fmt::Error) } } + +/// Parforms the following escapes: +/// - `<` → `&lt;` +/// - `>` → `&gt;` +/// - `"` → `&quot;` +/// - `'` → `&apos;` +/// - `&` → `&amp;` +pub fn escape_xml(string: &str) -> String { + let mut bytes = string.bytes(); + let mut byte_builder: Vec<u8> = Vec::with_capacity(bytes.len()); + while let Some(byte) = bytes.next() { + match byte { + b'<' => byte_builder.extend(b"&lt;"), + b'>' => byte_builder.extend(b"&gt;"), + b'"' => byte_builder.extend(b"&quot;"), + b'\'' => byte_builder.extend(b"&apos;"), + b'&' => byte_builder.extend(b"&amp;"), + _ => byte_builder.push(byte) + } + } + unsafe { + String::from_utf8_unchecked(byte_builder) + } +} + +/// Re-constitute original S-expressions. +pub mod sexp; /// XML generation. pub mod xml; - /// HTML5 CSS generation. pub mod css; /// HTML5 HTML generation. pub mod html; - diff --git a/src/assemble/sexp.rs b/src/assemble/sexp.rs @@ -0,0 +1,66 @@ +//! Output expanded source-code as identical looking to the original +//! hand-written code as possible. + +use super::{MarkupDisplay, GenerationError, Formatter}; +use crate::parse::parser::{ParseNode, ParseTree}; + +#[derive(Debug, Clone)] +pub struct SExpFormatter<'a> { + pub tree : ParseTree<'a>, +} + +impl<'a> SExpFormatter<'a> { + pub fn new(tree: ParseTree<'a>) -> Self { + Self { tree } + } +} + +impl<'a> MarkupDisplay for SExpFormatter<'a> { + fn document(&self) -> Result<String, GenerationError> { + self.display() + } + + fn generate(&self, f: Formatter) -> Result<(), GenerationError> { + let mut tree_iter = self.tree.iter().peekable(); + while let Some(node) = tree_iter.next() { + generate_node(f, node)?; + } + Ok(()) + } +} + +// TODO: Make this into a trait on `ParseNode`? +/// Write S-expression string for a single parse node into formatter. +fn generate_node<'a>(f: Formatter, node: &ParseNode<'a>) -> Result<(), GenerationError<'a>> { + match node { + ParseNode::Symbol(node) + | ParseNode::Number(node) => { + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", node.value)?; + }, + ParseNode::String(node) => { + // We actually don't want the rendered string, + // we want the escaped string, so we retrieve + // it from source. + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", node.site.view())?; + }, + ParseNode::List { nodes, leading_whitespace, end_token, .. } => { + write!(f, "{}", leading_whitespace)?; + write!(f, "(")?; + let tree = nodes.to_vec(); + let sexp_fmt = SExpFormatter::new(tree.into_boxed_slice()); + let sexp_fmt = Box::leak(Box::new(sexp_fmt)); // FIXME: Store. + sexp_fmt.generate(f)?; + write!(f, "{}", end_token.leading_whitespace)?; + write!(f, ")")?; + + }, + ParseNode::Attribute { keyword, node, leading_whitespace, .. } => { + write!(f, "{}", leading_whitespace)?; + write!(f, ":{}", keyword)?; + generate_node(f, node)?; + }, + } + Ok(()) +} diff --git a/src/assemble/xml.rs b/src/assemble/xml.rs @@ -1,25 +1,28 @@ //! Assembles an expanded tree into valid XML. -use super::{MarkupDisplay, GenerationError, Formatter}; +use super::{escape_xml, MarkupDisplay, GenerationError, Formatter}; use crate::parse::parser::{self, ParseNode, ParseTree}; #[derive(Debug, Clone)] -pub struct XMLFormatter { - pub tree : ParseTree +pub struct XMLFormatter<'a> { + pub tree : ParseTree<'a>, } -impl XMLFormatter { - pub fn new(tree : ParseTree) -> Self { +impl<'a> XMLFormatter<'a> { + pub fn new(tree: ParseTree<'a>) -> Self { Self { tree } } - fn display_attribute(&self, attr : &parser::AttributeNode) + fn display_attribute(&'a self, attr: &'a parser::ParseNode<'a>) -> Result<String, GenerationError> { - if let Some(symbol) = (*attr.node).atomic() { - Ok(format!("{}=\"{}\"", attr.keyword, symbol.value)) + let parser::ParseNode::Attribute { keyword, node, .. } = attr else { + panic!("Passed non-attribute to display_attribute.") + }; + if let Some(symbol) = (*node).atomic() { + Ok(format!("{}=\"{}\"", keyword, symbol.value)) } else { Err(GenerationError::new("XML", "Attribute can only contain symbols, numbers or strings", - &(*attr.node).site())) + &(*node).site())) } } @@ -28,18 +31,17 @@ impl XMLFormatter { pub const DEFAULT : &str = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"; -impl MarkupDisplay for XMLFormatter { +impl<'a> MarkupDisplay for XMLFormatter<'a> { fn document(&self) -> Result<String, GenerationError> { let mut doc = String::new(); if self.tree.is_empty() { return Ok(String::from(DEFAULT)); } - let stripped = parser::strip(&self.tree, true); - let current_node = stripped.get(0); + let current_node = self.tree.get(0); // Check if declaration exists. let mut has_declaration = false; - if let Some(ParseNode::List(list)) = current_node.as_ref() { + if let Some(ParseNode::List { nodes: list, .. }) = current_node.as_ref() { if let Some(ParseNode::Symbol(declaration)) = list.get(0) { if declaration.value.to_lowercase() == "?xml" { has_declaration = true; @@ -53,40 +55,41 @@ impl MarkupDisplay for XMLFormatter { // Populate. doc += &self.display()?; - doc += "<!-- Generated by SEAM, from symbolic-expressions \ - into XML. -->\n"; Ok(doc) } - fn generate(&self, f : Formatter) - -> Result<(), GenerationError> { + fn generate(&self, f : Formatter) -> Result<(), GenerationError> { let mut tree_iter = self.tree.iter().peekable(); while let Some(node) = tree_iter.next() { match node { ParseNode::Symbol(node) | ParseNode::Number(node) => { - // If the node ahead is so-called "symbolic", we can - // infere there was a space between them. - write!(f, "{}", node.value)?; - if let Some(peek) = tree_iter.peek() { - if peek.symbolic().is_some() { - write!(f, " ")? - } - } + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))?; }, - ParseNode::String(node) => write!(f, "{}", node.value)?, - ParseNode::List(list) => { + ParseNode::String(node) => { + write!(f, "{}", node.leading_whitespace)?; + write!(f, "{}", escape_xml(&node.value))? + }, + ParseNode::List { nodes: list, leading_whitespace, end_token, .. } => { + write!(f, "{}", leading_whitespace)?; let head = list.first(); - let mut tag = ""; + let tag: &str; // xml <tag> name. if let Some(head_node) = head { if let ParseNode::Symbol(head_symbol) = head_node { tag = &head_symbol.value; write!(f, "<{}", tag)?; } else { // Error, tags can only have symbol values. + return Err(GenerationError::new("XML", + "XML tags can only be given as symbols.", + head_node.site())); } } else { // Error, empty tags not supported. + return Err(GenerationError::new("XML", + "Empty lists cannot be converted into a valid XML tag.", + node.site())); } let mut rest = &list[1..]; @@ -95,16 +98,14 @@ impl MarkupDisplay for XMLFormatter { let front = tag.as_bytes()[0] as char; if front == '!' || front == '?' { while !rest.is_empty() { - if let ParseNode::List(_list) = &rest[0] { - // TODO: Throw error. + if let Some(node) = rest[0].symbolic() { + write!(f, "{}", node.value)?; + } else if let attr@ParseNode::Attribute { .. } = &rest[0] { + write!(f, " {}", self.display_attribute(attr)?)?; } else { - if let Some(node) = rest[0].symbolic() { - write!(f, "{}", node.value)?; - } else if let ParseNode::Attribute(a) = &rest[0] { - write!(f, " {}", self.display_attribute(a)?)?; - } else { - // Error. - } + return Err(GenerationError::new("XML", + "Only identifiers and attributes are allowed in declarations.", + &rest[0].site())); } rest = &rest[1..]; } @@ -116,32 +117,44 @@ impl MarkupDisplay for XMLFormatter { continue; } - while let Some(ParseNode::Attribute(attr)) = rest.first() { + while let Some(attr@ParseNode::Attribute { .. }) = rest.first() { write!(f, " {}", self.display_attribute(&attr)?)?; rest = &rest[1..]; } write!(f, ">")?; - let xml_fmt = XMLFormatter::new(rest.to_owned()); + // See similar comment for HTML generation: + // We strip leading whitespace from the first child element in a tag. + // This is more natural w.r.t. the S-exp syntax. + let mut rest = rest.to_vec(); + let mut is_first_node_on_next_line = false; + if let Some(first_node) = rest.get_mut(0) { + is_first_node_on_next_line = first_node.leading_whitespace().contains('\n'); + if !is_first_node_on_next_line { + first_node.set_leading_whitespace("".to_owned()); + } + } + + let xml_fmt = XMLFormatter::new(rest.to_owned().into_boxed_slice()); + let xml_fmt = Box::leak(Box::new(xml_fmt)); // FIXME: store formatter. xml_fmt.generate(f)?; + + // Closing tag should be equally as spaced as opening tag (?) + if end_token.leading_whitespace.is_empty() { + if is_first_node_on_next_line || tag == "style" { + write!(f, "{}", leading_whitespace)?; + } + } else { + write!(f, "{}", end_token.leading_whitespace)?; + } + write!(f, "</{}>", tag)?; }, _ => return Err(GenerationError::new("XML", - "Unknonw node encountered.", &node.site())) + &format!("Unexpected {} node when generating.", node.node_type()), + &node.site())) } } Ok(()) } } - - -// TODO: Convert special characters to XML compatible ones. -// e.g. -// < => &lt; -// > => &gt; -// & => &amp; -// " => &quot; -// ! => &excl; -// etc. - - diff --git a/src/bin.rs b/src/bin.rs @@ -14,43 +14,53 @@ fn argument_fatal(msg : impl std::fmt::Display) -> ! { std::process::exit(1) } -const SUPPORTED_TARGETS : [&str; 3] = ["html", "xml", "css"]; +const SUPPORTED_TARGETS : [&str; 4] = ["sexp", "html", "xml", "css"]; fn main() -> Result<(), Box<dyn Error>> { let mut args = env::args(); - args.next(); // Discard. + let _ = args.next(); // Discard. let mut files = Vec::new(); let mut target = ""; let mut from_stdin = false; + let mut is_doc = true; for arg in args { if arg.chars().nth(0) == Some('-') { - if let Some(opt) = arg.split("--").nth(1) { - if SUPPORTED_TARGETS.contains(&opt) { - target = Box::leak(opt.to_owned().into_boxed_str()); + if let Some(opt) = arg.split("--").nth(1) { + if SUPPORTED_TARGETS.contains(&opt) { + target = Box::leak(opt.to_owned().into_boxed_str()); + continue; + } + match opt { + "nodocument" | "nodoc" => is_doc = false, + _ => argument_fatal( + format!("Unknown argument: `--{}'.", opt)) + } + } else if let Some(opt) = arg.split("-").nth(1) { + match opt { + "v" => { + let (major, minor, tiny) = seam::VERSION; + eprintln!("{}", format!("SEAM v{}.{}.{}", + major, minor, tiny).bold()); + std::process::exit(0); + }, + "" => { + from_stdin = true; + }, + _ => argument_fatal( + format!("Unknown argument: `-{}'.", opt)) + } } - continue; - } else if let Some(opt) = arg.split("-").nth(1) { - match opt { - "v" => { - let (major, minor, tiny) = seam::VERSION; - eprintln!("{}", format!("SEAM v{}.{}.{}", - major, minor, tiny).bold()); - std::process::exit(0); - }, - "" => { - from_stdin = true; - }, - _ => argument_fatal( - format!("Unknown argument (`-{}').", opt)) + } else { + // Otherwise its a file path. + let path = PathBuf::from(&arg); + if path.exists() { + files.push(path); + } else { + argument_fatal(format!("File not found: `{}'.", path.to_string_lossy())); } } - } - let path = PathBuf::from(&arg); - if path.exists() { - files.push(path); - } } if files.is_empty() { @@ -63,60 +73,48 @@ fn main() -> Result<(), Box<dyn Error>> { if from_stdin { let mut stdin = io::stdin(); - let tree = match seam::parse_stream(&mut stdin) { - Ok(tree) => tree, - Err(e) => { - eprintln!("{}", e); - std::process::exit(1) - } - }; - print_generated(tree, target); + let builder = seam::tree_builder_stream(&mut stdin)?; + generate_and_print(&builder, target, is_doc); } for file in files { - let tree = match seam::parse_file(&file) { - Ok(tree) => tree, - Err(e) => { - eprintln!("{}", e); - std::process::exit(1) - } - }; - #[cfg(feature="debug")] - eprintln!("{}", &tree - .iter().fold(String::new(), - |acc, s| acc + "\n" + &s.to_string())); - print_generated(tree, target); + let builder = seam::tree_builder_file(&file)?; + generate_and_print(&builder, target, is_doc); } Ok(()) } -fn print_generated(tree : seam::parse::ParseTree, target : &str) { - let result = match target { - "html" => { - let fmt = seam::assemble::html::HTMLFormatter::new(tree); - fmt.document() - }, - "xml" => { - let fmt = seam::assemble::xml::XMLFormatter::new(tree); - fmt.document() - }, - "css" => { - let fmt = seam::assemble::css::CSSFormatter::new(tree); +fn generate_and_print<'a>(expander: &'a seam::parse::expander::Expander<'a>, target: &str, is_doc: bool) { + let tree = match expander.expand() { + Ok(tree) => tree, + Err(e) => { + eprintln!("{}", e); + std::process::exit(1); + } + }; + let fmt: Box<dyn MarkupDisplay> = match target { + "sexp" => Box::new(seam::assemble::sexp::SExpFormatter::new(tree)), + "html" => Box::new(seam::assemble::html::HTMLFormatter::new(tree)), + "xml" => Box::new(seam::assemble::xml::XMLFormatter::new(tree)), + "css" => Box::new(seam::assemble::css::CSSFormatter::new(tree)), + _ => { + argument_fatal( + format!("Target `{}', does not exist.", target)) + } + }; + let result = if is_doc { fmt.document() - }, - _ => { - argument_fatal( - format!("Target `{}', does not exist.", target)) - }}; + } else { + fmt.display() + }; match result { - Ok(generated) => print!("{}", generated), + Ok(generated) => println!("{}", generated), Err(e) => { eprintln!("{}", e); std::process::exit(1) } } } - diff --git a/src/lib.rs b/src/lib.rs @@ -1,38 +1,36 @@ #![allow(incomplete_features)] +#![feature(pattern)] #![feature(associated_type_defaults)] -#![feature(generic_associated_types)] pub mod parse; pub mod assemble; use parse::{expander, parser, lexer}; -use std::error::Error; use std::{fs, io, path::Path}; -pub const VERSION : (u8, u8, u8) = (0, 1, 8); +pub const VERSION : (u8, u8, u8) = (0, 2, 0); -pub fn parse<P: AsRef<Path>>(string : String, source : Option<P>) - -> Result<parser::ParseTree, Box<dyn Error>> { - let tokens = lexer::lex(string, source)?; - #[cfg(feature="debug")] - eprintln!("{:#?}", &tokens); - let tree = parser::parse_stream(tokens)?; - let expanded = expander::expand(tree)?; - Ok(expanded) +pub fn tree_builder<'a, P: AsRef<Path>>(source_path: Option<P>, string: String) + -> expander::Expander<'a> { + let path = source_path.map_or("<stdin>".to_string(), + |s| s.as_ref().to_string_lossy().to_string()); + let tokenizer = lexer::Lexer::new(path, string); + let builder = parser::Parser::new(tokenizer); + expander::Expander::new(builder) } -pub fn parse_file(path : &Path) - -> Result<parser::ParseTree, Box<dyn Error>> { +pub fn tree_builder_file<'a>(path: &Path) + -> io::Result<expander::Expander<'a>> { let contents = fs::read_to_string(&path)?; - parse(contents, Some(&path)) + Ok(tree_builder(Some(path), contents)) } -pub fn parse_stream(stream : &mut impl io::Read) - -> Result<parser::ParseTree, Box<dyn Error>> { +pub fn tree_builder_stream(stream: &mut impl io::Read) + -> io::Result<expander::Expander> { let mut contents = String::new(); stream.read_to_string(&mut contents)?; - parse(contents, Option::<&Path>::None) + Ok(tree_builder(Option::<&Path>::None, contents)) } pub fn main() { diff --git a/src/parse/expander.rs b/src/parse/expander.rs @@ -1,286 +1,387 @@ -use super::parser::{self, ParseNode, ParseTree, Node}; +use super::parser::{Parser, ParseNode, ParseTree, Node}; use super::tokens::Site; -use std::{fmt, path::{Path, PathBuf}, ffi::OsString, error::Error}; +use std::{ + fmt, + cell::RefCell, + path::{ + Path, + PathBuf + }, + ffi::OsString, + error::Error, + rc::Rc, +}; use colored::*; +use unicode_width::UnicodeWidthStr; /// Error type for errors while expanding macros. #[derive(Debug, Clone)] -pub struct ExpansionError(pub String, pub Site); +pub struct ExpansionError<'a>(pub String, pub Site<'a>); -impl ExpansionError { +impl<'a> ExpansionError<'a> { /// Create a new error given the ML, the message, and the site. - pub fn new(msg : &str, site : &Site) -> Self { + pub fn new(msg: &str, site: &Site<'a>) -> Self { Self(msg.to_owned(), site.to_owned()) } } /// Implement fmt::Display for user-facing error output. -impl fmt::Display for ExpansionError { - fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { +impl<'a> fmt::Display for ExpansionError<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ExpansionError(msg, site) = self; + let line_prefix = format!(" {} |", site.line); + let line_view = site.line_slice(); + writeln!(f, "{} {}", line_prefix, line_view)?; + writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", + prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), + text_offset=site.line_column() - 1, + length=site.width())?; write!(f, "[{}] Error Expanding Macro {}: {}", - "**".red().bold(), self.1, self.0) + "**".red().bold(), site, msg) } } -/// Implements std::error::Error. -impl Error for ExpansionError { } +/// Implements std::error::Error for macro expansion error. +impl<'a> Error for ExpansionError<'a> { } use std::collections::HashMap; -#[derive(Clone)] -struct Macro { - name : String, - params : Vec<String>, - body : Vec<ParseNode> +/// A macro consists of: +/// - its name; +/// - its argument list (if any); +/// - and its defintion (i.e. *body*). +#[derive(Debug, Clone)] +pub struct Macro<'a> { + name: String, + params: Box<[String]>, + body: Box<[ParseNode<'a>]> } +// TODO: Macro to also store its own scope (at place of definition) +// in order to implement lexical scoping. -impl Macro { - fn new(name : &str) -> Macro { +impl<'a> Macro<'a> { + pub fn new(name : &str) -> Macro { Macro { name: name.to_string(), - params: Vec::new(), - body: Vec::new() + params: Box::new([]), + body: Box::new([]), } } } -#[derive(Clone)] -struct ExpansionContext { - definitions : HashMap<String, Macro> +/// Type of variable scope owned by an `Expander` instance. +pub type Scope<'a> = RefCell<HashMap<String, Rc<Macro<'a>>>>; // Can you believe this type? + +#[derive(Debug, Clone)] +pub struct Expander<'a> { + parser: Parser, + definitions: Scope<'a>, } -impl ExpansionContext { - pub fn new() -> Self { Self { definitions: HashMap::new() } } +impl<'a> Expander<'a> { + pub fn new(parser: Parser) -> Self { + Self { + parser, + definitions: RefCell::new(HashMap::new()) + } + } - fn expand_invocation(&mut self, name : &str, - site : &Site, - params : Vec<ParseNode>) - -> Result<ParseTree, ExpansionError> { match name { - // Some macros are lazy (e.g. `ifdef`), so each macro has to - // expand the macros in its arguments individually. - "define" => { - let (head, nodes) = if let [head, nodes@..] = params.as_slice() { - (head, nodes) - } else { - return Err(ExpansionError::new( - &format!("`define` macro takes at least \ - two (2) arguments ({} were given.", params.len()), - site)); - }; + /// Get underlying source-code of the active parser for current unit. + pub fn get_source(&'a self) -> &'a str { + self.parser.get_source() + } - // If head is atomic, we assign to a 'variable'. - let def_macro = if let Some(variable) = head.atomic() { - let mut definition = Macro::new(&variable.value); - for node in nodes { - definition.body.push(node.clone()); - } - definition - } else { // Otherwise, we are assigning to a 'function'. - let (name, params) = if let ParseNode::List(call) = head { - let (name, params) = if let [name, params@..] = call.as_slice() { - (name, params) - } else { - return Err(ExpansionError::new( - "`define` function definition must at \ - least have a name.", site)); - }; - let mut arguments = Vec::with_capacity(params.len()); - for node in params { // Verify params are symbols. - if let ParseNode::Symbol(param) = node { - arguments.push(param.value.clone()); - } else { - return Err(ExpansionError::new( - "`define` function arguments must be \ - symbols/identifers.", site)); - }; - } - if let ParseNode::Symbol(name) = name { - (name.value.clone(), arguments) - } else { - return Err(ExpansionError::new( - "`define` function name must be \ - a symbol/identifier.", site)); - } - } else { - return Err(ExpansionError::new( - "First argument of `define` macro must be a list \ - or variable name/identifier.", site)); - }; + /// Update variable (macro) for this scope. + fn insert_variable(&'a self, name: String, var: Rc<Macro<'a>>) { + let mut defs = self.definitions.borrow_mut(); + defs.insert(name, var); + } - let mut definition = Macro::new(&name); - definition.params = params; - for node in nodes { - definition.body.push(node.clone()); - } - definition - }; + /// Check if macro exists in this scope. + fn has_variable(&'a self, name: &str) -> bool { + let defs = self.definitions.borrow(); + defs.contains_key(name) + } - self.definitions.insert(def_macro.name.clone(), def_macro); - - Ok(vec![]) - }, - "ifdef" => { - if params.len() < 2 || params.len() > 3 { - eprintln!("{:?}", params); - return Err(ExpansionError::new(&format!("`ifdef` takes one (1) \ - condition and one (1) consequent, a third optional \ - alternative expression may also be provided, but \ - `ifdef` was given {} arguments.", params.len()), - site)); - } - let symbol = if let Some(node) = params[0].atomic() { - node.value - } else { - return Err(ExpansionError::new("The first argument to \ - `ifdef` must be a symbol/name.", &params[0].site())); - }; + fn get_variable(&'a self, name: &str) -> Option<Rc<Macro<'a>>> { + self.definitions.borrow().get(name).map(|m| m.clone()) + } - if self.definitions.contains_key(&symbol) { - Ok(self.expand_node(params[1].clone())?) - } else { - if let Some(alt) = params.get(2) { - Ok(self.expand_node(alt.clone())?) + /// Define a macro with `(%define a b)` --- `a` is a symbol or a list `(c ...)` where `c` is a symbol. + /// macro definitions will eliminate any preceding whitespace, so make sure trailing whitespace provides + /// the whitespace you need. + fn expand_define_macro(&'a self, node: &'a ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let [head, nodes@..] = &*params else { + return Err(ExpansionError( + format!("`%define` macro takes at least \ + two (2) arguments ({} were given.", params.len()), + node.site().to_owned())); + }; + + // If head is atomic, we assign to a 'variable'. + let def_macro = if let Some(variable) = head.atomic() { + Rc::new(Macro { + name: variable.value.clone(), + params: Box::new([]), + body: nodes.to_owned().into_boxed_slice(), + }) + } else { // Otherwise, we are assigning to a 'function'. + let ParseNode::List { nodes: defn_nodes, .. } = head else { + return Err(ExpansionError( + "First argument of `%define` macro must be a list \ + or variable name/identifier.".to_owned(), + node.site().to_owned())); + }; + let [name, params@..] = &**defn_nodes else { + return Err(ExpansionError( + "`%define` macro definition must at \ + least have a name.".to_owned(), + node.site().to_owned())); + }; + let mut arguments: Vec<String> = Vec::with_capacity(params.len()); + for param_node in params { // Verify arguments are symbols. + if let ParseNode::Symbol(param) = param_node { + arguments.push(param.value.clone()); } else { - Ok(vec![]) - } + return Err(ExpansionError( + "`define` function arguments must be \ + symbols/identifers.".to_owned(), + node.site().to_owned())); + }; } - }, - "include" => { - let params = self.expand_nodes(params)?; - let path_node = if let [ p ] = params.as_slice() { - p - } else { - return Err(ExpansionError::new( - &format!("Incorrect number of arguments \ - to `{}' macro. Got {}, expected {}.", - name, params.len(), 1), - site)); + let ParseNode::Symbol(name_node) = name else { + return Err(ExpansionError( + "`define` function name must be \ + a symbol/identifier.".to_owned(), + node.site().to_owned())); }; + let name = name_node.value.clone(); - let path = if let Some(node) = path_node.atomic() { - node.value - } else { - return Err(ExpansionError::new( - &format!("Bad argument to `{}' macro.\n\ - Expected a path, but did not get any value - that could be interpreted as a path.", name), - site)) - }; + Rc::new(Macro { + name, + params: arguments.into_boxed_slice(), + body: nodes.to_owned().into_boxed_slice(), + }) + }; - // Open file, and parse contents! - let path = Path::new(&path); - let tree = match super::parse_file_noexpand(&path) { - Ok(tree) => tree, - Err(error) => { - let err = ExpansionError::new( - &format!("{}", error), site); - // Try with `.sex` extensions appended. - let mut with_ext = PathBuf::from(path); - let filename = path.file_name().ok_or(err.clone())?; - with_ext.pop(); - - let mut new_filename = OsString::new(); - new_filename.push(filename); - new_filename.push(".sex"); - - with_ext.push(new_filename); - match super::parse_file_noexpand(&with_ext) { - Ok(tree) => tree, - Err(_) => return Err(err) - } - } - }; - - // Build new (expanded) tree, with result of previous - // parse, while recursively expanding each branch in the - // tree too, as they are added. - let mut expanded_tree = Vec::with_capacity(tree.len()); - for branch in tree { - expanded_tree.extend(self.expand_node(branch)?); - } - Ok(expanded_tree) - }, - "date" => { - let params = self.expand_nodes(params)?; - let date_format = if let [ p ] = params.as_slice() { - p - } else { - return Err(ExpansionError::new( - &format!("`{}' macro only expects one formatting argument.", - name), - site)) - }; + self.insert_variable(def_macro.name.to_owned(), def_macro); + Ok(Box::new([])) + } - let (date_format, site) = if let Some(node) = date_format.atomic() { - (node.value, node.site) + /// `(%ifdef symbol a b)` --- `b` is optional, however, if not provided *and* + /// the symbol is not defined, it will erase the whole expression, and whitespace will not + /// be preseved before it. If that's a concern, provide `b` as the empty string `""`. + fn expand_ifdef_macro(&'a self, node: &'a ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + if params.len() < 2 || params.len() > 3 { + return Err(ExpansionError(format!("`ifdef` takes one (1) \ + condition and one (1) consequent, a third optional \ + alternative expression may also be provided, but \ + `ifdef` was given {} arguments.", params.len()), + node.site().to_owned())); + } + let symbol = if let Some(node) = params[0].atomic() { + node.value + } else { + // FIXME: Borrow-checker won't let me use params[0].site() as site! + return Err(ExpansionError( + "The first argument to `ifdef` must be a symbol/name.".to_string(), + node.site().clone())); + }; + + let mut expanded = if self.has_variable(&symbol) { + self.expand_node(params[1].clone())? + } else { + if let Some(alt) = params.get(2) { + self.expand_node(alt.clone())? } else { - return Err(ExpansionError::new( - &format!("`{}' macro needs string (or atomic) \ - formatting argument.", name), - site)) - }; + Box::new([]) + } + }; + if let Some(first_node) = expanded.get_mut(0) { + first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); + } + Ok(expanded) + } - let now = chrono::Local::now(); - let formatted = now.format(&date_format).to_string(); - Ok(vec![ParseNode::String(Node::new(&formatted, &site))]) - }, - "log" => { - let mut words = Vec::with_capacity(params.len()); - for param in self.expand_nodes(params)? { - if let Some(word) = param.atomic() { - words.push(word.value.clone()); - } else { - return Err(ExpansionError::new("`log` should only take \ - arguments that are either symbols, strings or numbers.", - &param.site())); + fn expand_include_macro(&'a self, node: &'a ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?; + let [path_node] = &*params else { + return Err(ExpansionError( + format!("Incorrect number of arguments \ + to `%include' macro. Got {}, expected {}.", + params.len(), 1), + node.site().to_owned())); + }; + + let Some(Node { value: path, .. }) = path_node.atomic() else { + return Err(ExpansionError( + "Bad argument to `%include' macro.\n\ + Expected a path, but did not get any value + that could be interpreted as a path.".to_string(), + node.site().to_owned())) + }; + + // Open file, and parse contents! + let path = Path::new(&path); + let parser = match super::parser_for_file(&path) { + Ok(parser) => parser, + Err(error) => { + let err = ExpansionError( + format!("{}", error), node.site().to_owned()); + // Try with `.sex` extensions appended. + let mut with_ext = PathBuf::from(path); + let filename = path.file_name().ok_or(err.clone())?; + with_ext.pop(); + + let mut new_filename = OsString::new(); + new_filename.push(filename); + new_filename.push(".sex"); + + with_ext.push(new_filename); + match super::parser_for_file(&with_ext) { + Ok(parser) => parser, + Err(_) => return Err(err) } } - - eprintln!("{} {} {}: {}", "[#]".bold(), "log".bold().yellow(), - site, words.join(" ")); - Ok(vec![]) + }; + // FIXME: Whatever! I tried to indicate with life-times that these + // live for the entier duration that the lex->parse->expand phases. + // Might as well just leak it, since it's going to live that long anyway. + let leaked_parser = Box::leak(Box::new(parser)); // Keep a Vec<Box<Parser>>? + let tree = match leaked_parser.parse() { + Ok(tree) => tree, + Err(error) => return Err(ExpansionError( + format!("{}", error), node.site().to_owned())) + }; + + // Build new (expanded) tree, with result of previous + // parse, while recursively expanding each branch in the + // tree too, as they are added. + let mut expanded_tree = Vec::with_capacity(tree.len()); + for branch in tree { + expanded_tree.extend(self.expand_node(branch)?); + } + // First node should inherit leading whitespace from (%include ...) list. + if expanded_tree.len() != 0 { + expanded_tree[0].set_leading_whitespace(node.leading_whitespace().to_owned()); } - name => { - let params = self.expand_nodes(params)?; + Ok(expanded_tree.into_boxed_slice()) + } + + fn expand_date_macro(&'a self, node: &'a ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; + let [date_format] = &*params else { + return Err(ExpansionError::new( + "`%date' macro only expects one formatting argument.", + node.site())) + }; + + let Some(Node { value: date_format, .. }) = date_format.atomic() else { + return Err(ExpansionError::new( + "`%date' macro needs string (or atomic) \ + formatting argument.", node.site())) + }; + + let now = chrono::Local::now(); + let formatted = now.format(&date_format).to_string(); + let date_string_node = ParseNode::String(Node { + value: formatted, + site: node.site().clone(), + leading_whitespace: node.leading_whitespace().to_string(), + }); + Ok(Box::new([date_string_node])) + } - let mac = if let Some(mac) = self.definitions.get(name) { - mac + /// `(%log ...)` logs to `STDERR` when called and leaves *no* node behind. + /// This means whitespace preceeding `(%log ...)` will be removed! + fn expand_log_macro(&'a self, node: &'a ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut words = Vec::with_capacity(params.len()); + for param in self.expand_nodes(params)? { + if let Some(word) = param.atomic() { + words.push(word.value.clone()); } else { - return Err(ExpansionError::new( - &format!("Macro not found (`{}').", name), site)) + return Err(ExpansionError::new("`log` should only take \ + arguments that are either symbols, strings or numbers.", + node.site())); + } + } + + eprintln!("{} {} {}: {}", "[#]".bold(), "log".bold().yellow(), + node.site(), words.join(" ")); + Ok(Box::new([])) + } + + fn expand_macro(&'a self, name: &str, node: &'a ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Eagerly evaluate parameters passed to macro invocation. + let params = self.expand_nodes(params)?; + + let Some(mac) = self.get_variable(name) else { + return Err(ExpansionError::new( + &format!("Macro not found (`{}').", name), node.site())) + }; + + // Instance of expansion subcontext. + // FIXME: Leaking again, maybe track subcontexts in superior context? + let subcontext = Box::leak(Box::new(self.clone())); // TODO: Create a stack Vec<Rc<Context>> and clone it here. + // Check enough arguments were given. + if params.len() != mac.params.len() { + return Err(ExpansionError( + format!("`%{}` macro expects {} arguments, \ + but {} were given.", &mac.name, mac.params.len(), + params.len()), node.site().to_owned())); + } + // Define arguments for body. + for i in 0..params.len() { + let arg_macro = Macro { + name: mac.params[i].to_owned(), + params: Box::new([]), + body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site. }; + subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro)); + } + // Expand body. + let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec(); + // Inherit leading whitespace of invocation. + if let Some(first_node) = expanded.get_mut(0) { + first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); + } + Ok(expanded.into_boxed_slice()) + } - // Instance of expansion subcontext. - let mut subcontext = self.clone(); - // Check enough arguments were given. - if params.len() != mac.params.len() { - return Err(ExpansionError::new( - &format!("`%{}` macro expects {} arguments, \ - but {} were given.", &mac.name, mac.params.len(), - params.len()), site)); - } - // Define arguments for body. - for i in 0..params.len() { - let mut arg_macro = Macro::new(&mac.params[i]); - arg_macro.body.push(params[i].clone()); - subcontext.definitions.insert(mac.params[i].clone(), arg_macro); - } - // Expand body. - subcontext.expand_nodes(mac.body.clone()) + fn expand_invocation(&'a self, + name: &str, //< Name of macro (e.g. %define). + node: &'a ParseNode<'a>, //< Node for `%'-macro invocation. + params: Box<[ParseNode<'a>]> //< Passed in arguments. + ) -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Some macros are lazy (e.g. `ifdef`), so each macro has to + // expand the macros in its arguments individually. + match name { + "define" => self.expand_define_macro(node, params), + "ifdef" => self.expand_ifdef_macro(node, params), + "include" => self.expand_include_macro(node, params), + "date" => self.expand_date_macro(node, params), + "log" => self.expand_log_macro(node, params), + _ => self.expand_macro(name, node, params), } - }} + } - pub fn expand_node(&mut self, node : ParseNode) - -> Result<ParseTree, ExpansionError> { + pub fn expand_node(&'a self, node: ParseNode<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { match node { ParseNode::Symbol(ref sym) => { // Check if symbol starts with %... and replace it // with it's defined value. if sym.value.starts_with("%") { let name = &sym.value[1..]; - if let Some(def) = self.definitions.get(name) { + if let Some(def) = self.get_variable(name) { if !def.params.is_empty() { // Should not be a function. return Err(ExpansionError::new( &format!("`{}` is a macro that takes arguments, \ @@ -289,65 +390,75 @@ impl ExpansionContext { } Ok(def.body.clone()) } else { // Not found. - Err(ExpansionError::new( - &format!("No such macro, `{}`.", name), - &sym.site)) + Err(ExpansionError( + format!("No such macro, `{}`.", name), + sym.site.to_owned())) } } else { - Ok(vec![node]) + Ok(Box::new([node])) } }, - ParseNode::List(list) => { + ParseNode::List { ref nodes, ref site, ref end_token, ref leading_whitespace } => { // Check for macro invocation (%_ _ _ _). // Recurse over every element. - let len = list.len(); - let mut call = list.into_iter(); + let len = nodes.len(); + let mut call = nodes.to_vec().into_iter(); let head = call.next(); - if let Some(ParseNode::Symbol(ref sym)) = head { - if sym.value.starts_with("%") { + // Pathway: (%_ _ _) macro invocation. + if let Some(ref symbol@ParseNode::Symbol(..)) = head { + // FIXME: This is just really bad... + let list_node = Box::leak(Box::new(node.clone())); + let name = symbol.atomic().unwrap().value; + if name.starts_with("%") { // Rebuild node... - let name = &sym.value[1..]; - let Node { site, .. } = sym; + let name = &name[1..]; // Clean macro arguments from whitespace tokens. - let call_vec: ParseTree = call.collect(); - let params = parser::strip(&call_vec, false); - return self.expand_invocation(name, site, params); + let params: Vec<ParseNode> = call.collect(); + return self.expand_invocation(name, list_node, params.into_boxed_slice()); } } - - // Rebuild node... + // Otherwise, if not a macro, just expand child nodes incase they are macros. let mut expanded_list = Vec::with_capacity(len); - expanded_list.extend(self.expand_node(head.unwrap())?); + expanded_list.extend(self.expand_node(head.clone().unwrap())?); for elem in call { expanded_list.extend(self.expand_node(elem)?); } - Ok(vec![ParseNode::List(expanded_list)]) + Ok(Box::new([ParseNode::List { + nodes: expanded_list.into_boxed_slice(), + site: site.clone(), + end_token: end_token.clone(), + leading_whitespace: leading_whitespace.clone(), + }])) }, - ParseNode::Attribute(mut attr) => { - let mut expanded_nodes = self.expand_node(*attr.node)?; - attr.node = Box::new(expanded_nodes[0].clone()); - expanded_nodes[0] = ParseNode::Attribute(attr); + ParseNode::Attribute { keyword, node, site, leading_whitespace } => { + let mut expanded_nodes = self.expand_node(*node)?; + let new_node = Box::new(expanded_nodes[0].clone()); + expanded_nodes[0] = ParseNode::Attribute { + keyword, + node: new_node, + site, + leading_whitespace, + }; Ok(expanded_nodes) }, - _ => Ok(vec![node]) + _ => Ok(Box::new([node])) } } - fn expand_nodes(&mut self, tree : ParseTree) -> Result<ParseTree, ExpansionError> { + pub fn expand_nodes(&'a self, tree: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { let mut expanded = Vec::with_capacity(tree.len()); - for branch in tree { + for branch in tree.into_vec() { expanded.extend(self.expand_node(branch)?); } - Ok(expanded) + Ok(expanded.into_boxed_slice()) } -} - -/// Macro-expansion phase. -/// Macros start with `%...'. -pub fn expand(tree : ParseTree) -> Result<ParseTree, ExpansionError> { - let mut context = ExpansionContext::new(); - context.expand_nodes(tree) + pub fn expand(&'a self) -> Result<ParseTree<'a>, Box<dyn 'a + std::error::Error>> { + let tree = self.parser.parse()?; + let expanded = self.expand_nodes(tree)?; + Ok(expanded) + } } diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs @@ -1,301 +1,356 @@ -use super::tokens::{self, Site, Token, TokenStream}; +use super::tokens::{self, Site, Token}; -use std::rc::Rc; -use std::path::Path; +use std::str::pattern::Pattern; use std::{fmt, error::Error}; +use std::cell::Cell; + +use unicode_width::UnicodeWidthStr; #[derive(Debug, Clone)] -pub struct LexError(Site, String); +pub struct LexError<'a>(pub String, pub Site<'a>); -impl fmt::Display for LexError { +impl<'a> fmt::Display for LexError<'a> { fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "[**] Lexical Error {}: {}", - self.0, self.1) + let LexError(msg, site) = self; + let line_prefix = format!(" {} |", site.line); + let line_view = site.line_slice(); + writeln!(f, "{} {}", line_prefix, line_view)?; + writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", + prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), + text_offset=site.line_column() - 1, + length=site.width())?; + write!(f, "[**] Lexical Error {}: {}", site, msg) } } -impl Error for LexError { } +impl<'a> Error for LexError<'a> { } -fn is_whitespace(character : char) -> bool { +fn is_whitespace(character: char) -> bool { ['\n', '\r', '\t', ' '].contains(&character) } -fn character_kind(character : char, prev : Option<tokens::Kind>) +fn character_kind(character: char) -> Option<tokens::Kind> { - let kind = match character { + match character { '\n' | '\r' | ' ' | '\t' => None, - '(' => Some(tokens::Kind::LParen), - ')' => Some(tokens::Kind::RParen), + '(' => Some(tokens::Kind::LParen), + ')' => Some(tokens::Kind::RParen), '0'..='9' => Some(tokens::Kind::Number), - ':' => Some(tokens::Kind::Keyword), - '"' => Some(tokens::Kind::String), - _ => Some(tokens::Kind::Symbol) - }; - - if prev == Some(tokens::Kind::String) { - if character == '"' { - None - } else { - prev - } - } else { - kind + '-' => Some(tokens::Kind::Number), + ':' => Some(tokens::Kind::Keyword), + '"' => Some(tokens::Kind::String), + _ => Some(tokens::Kind::Symbol) } } -// TODO: Post-tokeniser parenthesis balancer, give -// nice and exact error messages. - -pub fn lex<P: AsRef<Path>>(string : String, source : Option<P>) - -> Result<TokenStream, LexError> { - - let eof = string.len(); - let mut lines : usize = 1; - let mut bytes : usize = 0; - let mut line_bytes : usize = 0; - - let source_str = source.map( - |e| Rc::from(e.as_ref().display().to_string())); - - let mut accumulator : Vec<u8> = Vec::new(); - let mut tokens : TokenStream = Vec::new(); - - let mut token_start : usize = 0; - let mut current_kind = None; - let mut old_kind = None; - let mut string_open = false; - let mut escaped = false; +/// Lexer moves source-code string into itself, +/// and references it when generating tokens. +#[derive(Debug, Clone)] +pub struct Lexer { + pub source_path: String, + pub source: String, + line: Cell<usize>, + byte_offset: Cell<usize>, + byte_offset_line: Cell<usize>, +} - while bytes < eof { - let current_byte = string.as_bytes()[bytes]; - if !string.is_char_boundary(bytes) { - accumulator.push(current_byte); - bytes += 1; - line_bytes += 1; - continue; +impl<'a> Lexer { + pub fn new(source_path: String, source: String) -> Self { + Self { + source_path, + source, + line: Cell::new(1), + byte_offset: Cell::new(0), + byte_offset_line: Cell::new(0), } + } - let character = current_byte as char; - - // Tripple quoted string: - if character == '"' - && string.get(bytes..bytes + 3) == Some("\"\"\"") { - token_start = line_bytes; - let start_line = lines; - bytes += 3; - line_bytes += 3; + pub fn get_source(&'a self) -> &'a str { + &self.source + } - let mut found_end_quote = false; + fn increment_byte_offsets(&'a self, offset: usize) { + let i = self.byte_offset.get(); + let j = self.byte_offset_line.get(); + self.byte_offset.set(i + offset); + self.byte_offset_line.set(j + offset); + } - while let Some(quote) = string.get(bytes..bytes + 3) { - if quote == "\"\"\"" { - found_end_quote = true; - break; - } + fn next_line(&'a self) { + let l = self.line.get(); + self.line.set(l + 1); + self.byte_offset_line.set(0); + } - let c = string.as_bytes()[bytes]; - if c == '\n' as u8 { - lines += 1; - line_bytes = 0; - } - accumulator.push(c); - bytes += 1; - line_bytes += 1; + /// Advance the lexer past any whitespace characters, + /// and ignore any comments. + fn consume_whitespace(&'a self) -> &'a str { + let bytes = self.source.as_bytes(); + let mut start = self.byte_offset.get(); + if start >= bytes.len() { + return ""; + } + let mut inside_eon_comment: bool = false; + loop { + let index = self.byte_offset.get(); + let byte: u8 = bytes[index]; + if byte as char == ';' { + inside_eon_comment = true; } - - if !found_end_quote { - let mut site = Site::from_line( - lines, line_bytes, 1); - site.source = source_str.clone(); - return Err(LexError(site, - String::from("Unclosed tripple-quoted string."))); + if !is_whitespace(byte as char) && !inside_eon_comment { + break; } - - - bytes += 3; - line_bytes += 3; - current_kind = None; - - let span = accumulator.len() + 3 + 3; - let mut site = Site::from_line(start_line, - token_start, span); - site.source = source_str.clone(); - tokens.push(Token::new(tokens::Kind::String, - String::from_utf8(accumulator).unwrap(), - site)); - accumulator = Vec::new(); - continue; - } - - if character == '\\' { // Escapes - if current_kind == Some(tokens::Kind::String) { - // How escapes work in strings (TODO) - let new_char = match string.as_bytes()[bytes + 1] as char { - 'n' => '\n', - 't' => '\t', - 'r' => '\r', - '0' => '\0', - c => c, - }; - accumulator.push(new_char as u8); - bytes += 2; - line_bytes += 2; - continue; - } else { - // How they work outside strings: - // TODO: add more escapes. - if bytes + 1 == eof { - continue; - } - match string.as_bytes()[bytes + 1] as char { - '\n' | '\r' | ' ' | '\t' => { - current_kind = None; - bytes += 1; - line_bytes += 1; - }, - _ => () - } - escaped = true; - bytes += 1; - line_bytes += 1; - continue; + self.increment_byte_offsets(1); + if self.byte_offset.get() >= bytes.len() { + break; } - } - - // EON Comments: - if character == ';' && current_kind != Some(tokens::Kind::String) { - let mut i = 0; - while bytes < eof - && string.as_bytes()[bytes + i] != '\n' as u8 { - i += 1; + if byte as char == '\n' { + self.next_line(); + if inside_eon_comment { + // EON comments ends at end-of-line. + inside_eon_comment = false; + // Now, whitespace is only what comes *after* the comment. + start = index; + } } - bytes += i; - continue; } - - let mut prev_kind = current_kind; - current_kind = character_kind(character, current_kind); - if escaped { - current_kind = Some(tokens::Kind::Symbol); + unsafe { + std::str::from_utf8_unchecked(&bytes[start..self.byte_offset.get()]) } + } - let string_start = character == '"' - && prev_kind != Some(tokens::Kind::String) - && !escaped; - if string_start { - string_open = true; - current_kind = None; + /// Look at immediately following complete character. + /// Returns `None` if file is at EOF. + fn peek_char(&self) -> Option<char> { + let bytes = self.source.as_bytes(); + let slice = &bytes[self.byte_offset.get()..]; + unsafe { + let utf8 = std::str::from_utf8_unchecked(slice); + let mut chars = utf8.chars(); + chars.next() } + } - let peek_char = if bytes == eof - 1 { - None - } else { - let peek_char = string.as_bytes()[bytes + 1] as char; - Some(peek_char) - }; - let mut peek_kind = if let Some(peeked) = peek_char { - character_kind(peeked, current_kind) - } else { None }; + /// Check if source-code at current possition starts with a pattern. + fn starts_with<P>(&'a self, pat: P) -> bool where P: Pattern<'a> { + self.source[self.byte_offset.get()..].starts_with(pat) + } - let some_lparen = Some(tokens::Kind::LParen); - let some_rparen = Some(tokens::Kind::RParen); + /// Advance the offset to the next unicode character. + /// Returns `None` if file is at EOF. + fn consume_char(&self) -> Option<char> { + let c = self.peek_char(); + self.increment_byte_offsets(1); + while !self.source.is_char_boundary(self.byte_offset.get()) { + self.increment_byte_offsets(1); + } + if c == Some('\n') { + self.next_line(); + } + return c; + } - let was_lparen = current_kind == some_lparen; - let was_rparen = current_kind == some_rparen; + fn consume_lparen(&'a self, whitespace: &'a str) -> Token<'a> { + let start = self.byte_offset.get(); + let line_offset = self.byte_offset_line.get(); + assert_eq!('(', self.consume_char().expect("consumed token at eof")); + let value: &str = &self.source[start..self.byte_offset.get()]; + let site: Site = self.site(start, line_offset); + Token::new(tokens::Kind::LParen, value, whitespace, site) + } - let peek_string = peek_char == Some('"'); - let peek_lparen = peek_kind == some_lparen; - let peek_rparen = peek_kind == some_rparen; + fn consume_rparen(&'a self, whitespace: &'a str) -> Token<'a> { + let start = self.byte_offset.get(); + let line_offset = self.byte_offset_line.get(); + assert_eq!(')', self.consume_char().expect("consumed token at eof")); + let value: &str = &self.source[start..self.byte_offset.get()]; + let site: Site = self.site(start, line_offset); + Token::new(tokens::Kind::RParen, value, whitespace, site) + } - if was_lparen || was_rparen { - peek_kind = None; - prev_kind = None; - } else if peek_rparen || peek_lparen { - peek_kind = None; - } else if peek_string { - peek_kind = None; - string_open = false; - } + fn consume_number(&'a self, whitespace: &'a str) -> Token<'a> { + let start = self.byte_offset.get(); + let line_offset = self.byte_offset_line.get(); + let value: &str = self.consume_identifier_string(); + let site: Site = self.site(start, line_offset); + Token::new(tokens::Kind::Number, value, whitespace, site) + } - // If we're on a whitespace, and there's a bracket (or quote) ahead, - // we need to explicitly say there's whitespace between the - // last token and the next bracket/quotation. - // (Ignore the whitespace, if it is consecutive to another whitespace) - match tokens.last() { - Some(token) if token.kind != tokens::Kind::Whitespace - && token.kind != tokens::Kind::Keyword - && is_whitespace(character) - && (peek_rparen - || peek_lparen - || peek_char == Some('"') - || token.kind == tokens::Kind::String - || token.kind == tokens::Kind::RParen) => { - let kind = tokens::Kind::Whitespace; - let mut site = Site::from_line(lines, line_bytes, 1); - site.source = source_str.clone(); - let value = character.to_string(); - tokens.push(Token::new(kind, value, site)); - }, - Some(_) | None => (), + /// Consume characters as long as they can be part of the identifier. + /// **Note:** backslashes are escaped and consume literally any + /// character after them, regardless of 'kind', including whitespace. + fn consume_identifier_string(&'a self) -> &'a str { + let start = self.byte_offset.get(); + while let Some(c) = self.peek_char() { + let Some(kind) = character_kind(c) else { break }; + // Symbols can contain escaped characters. + if c == '\\' { + let _ = self.consume_char(); // `\`. + let esc = self.consume_char(); // escaped char. + if esc == Some('\n') { + self.next_line(); // NOTE: Disallow this? + } + continue; + } + // Characters that fit in a symbol or number are valid idents. + match kind { + tokens::Kind::Symbol | tokens::Kind::Number => {}, + _ => break + } + let _ = self.consume_char(); } + &self.source[start..self.byte_offset.get()] + } - if let Some(kind_current) = current_kind { - if prev_kind.is_none() { - old_kind = current_kind; - token_start = line_bytes; - } - accumulator.push(current_byte); - bytes += 1; - line_bytes += 1; + /// Consume a symbol/identifier token. + fn consume_symbol(&'a self, whitespace: &'a str) -> Token<'a> { + let start = self.byte_offset.get(); + let line_offset = self.byte_offset_line.get(); + let value: &str = self.consume_identifier_string(); + let site: Site = self.site(start, line_offset); + Token::new(tokens::Kind::Symbol, value, whitespace, site) + } - if peek_kind.is_none() { - let kind = if let Some(kind_old) = old_kind { - kind_old + /// A string is consumed as a token, but not parsed. + fn consume_string(&'a self, whitespace: &'a str) -> Result<Token<'a>, LexError<'a>> { + let start = self.byte_offset.get(); + let line_no = self.line.get(); + let line_offset = self.byte_offset_line.get(); + assert_eq!('"', self.peek_char().expect("consumed token at eof")); + + let token = if self.starts_with(r#"""""#) { + // Tripple-quoted string. + self.increment_byte_offsets(3); + let start_of_string = self.byte_offset.get(); + // Read until end-of-string. + let mut reading_escape = false; + loop { + let Some(next_char) = self.peek_char() else { + let site = Site::new(&self.source_path, &self.source, line_no, start, line_offset, 3); + return Err(LexError( + String::from("Unclosed tripple-quoted string."), + site)); + }; + if next_char == '\n' { self.next_line(); } + if self.starts_with(r#"""""#) && !reading_escape { + break; // End-of-string. + } + if !reading_escape { + reading_escape = next_char == '\\'; } else { - kind_current + reading_escape = false; + } + self.increment_byte_offsets(1); + } + let end_of_string = self.byte_offset.get(); + self.increment_byte_offsets(3); + // String 'value' is inside quotes. + let value: &str = &self.source[start_of_string..end_of_string]; + let mut site: Site = self.site(start, line_offset); + site.line = line_no; + Token::new(tokens::Kind::String, value, whitespace, site) + } else { + // Single-quoted string. + self.increment_byte_offsets(1); + let start_of_string = self.byte_offset.get(); + // Read until end-of-string. + let mut reading_escape = false; + loop { + let Some(next_char) = self.peek_char() else { + let site = Site::new(&self.source_path, &self.source, line_no, start, line_offset, 1); + return Err(LexError( + String::from("Unclosed string quote (`\"')."), + site)); }; - - let mut span = accumulator.len(); - if kind == tokens::Kind::String { - span += 2; + if next_char == '\n' { self.next_line(); } + if next_char == '"' && !reading_escape { + break; // End-of-string. } + if !reading_escape { + reading_escape = next_char == '\\'; + } else { + reading_escape = false; + } + self.increment_byte_offsets(1); + } + let end_of_string = self.byte_offset.get(); + self.increment_byte_offsets(1); + // String 'value' is inside quotes. + let value: &str = &self.source[start_of_string..end_of_string]; + let mut site: Site = self.site(start, line_offset); + site.line = line_no; + Token::new(tokens::Kind::String, value, whitespace, site) + }; - let value = String::from_utf8(accumulator).unwrap(); - let mut site = Site::from_line(lines, token_start, span); - site.source = source_str.clone(); - tokens.push(Token::new(kind, value, site)); - accumulator = Vec::new(); + Ok(token) + } - if was_lparen || peek_rparen || was_rparen { - old_kind = None; - current_kind = None; - token_start = line_bytes; - } + fn consume_keyword(&'a self, whitespace: &'a str) -> Token<'a> { + assert_eq!(':', self.consume_char().expect("consumed token at eof")); + let start = self.byte_offset.get(); // Leave colon out of token value. + let start_from_line = self.byte_offset_line.get(); + let value: &str = self.consume_identifier_string(); + let site: Site = self.site(start - 1, start_from_line - 1); + Token::new(tokens::Kind::Keyword, value, whitespace, site) + } - } - } else { - bytes += 1; - line_bytes += 1; - } + /// Generate site from start byte-index. + fn site(&self, file_offset: usize, line_offset: usize) -> Site { + let span = self.byte_offset.get() - file_offset; + Site::new(&self.source_path, &self.source, self.line.get(), file_offset, line_offset, span) + } - if character == '\n' { - line_bytes = 0; - token_start = 0; - lines += 1; - } - if string_start { - current_kind = Some(tokens::Kind::String); - old_kind = current_kind; - token_start = line_bytes - 1; - } - escaped = false; + pub fn consume(&'a self) -> Result<Token<'a>, LexError<'a>> { + // Swallow up leading whitespace. + let whitespace = self.consume_whitespace(); + // If there is any text left, continuie depending on the inital char. + let character = self.peek_char().expect("tried to consume token on eof."); + let token = match character_kind(character) { + Some(tokens::Kind::LParen) => self.consume_lparen(whitespace), + Some(tokens::Kind::RParen) => self.consume_rparen(whitespace), + Some(tokens::Kind::Number) => self.consume_number(whitespace), + Some(tokens::Kind::String) => self.consume_string(whitespace)?, + Some(tokens::Kind::Symbol) => self.consume_symbol(whitespace), + Some(tokens::Kind::Keyword) => self.consume_keyword(whitespace), + None => unreachable!("incompletely consumed whitespace.") + }; + Ok(token) } - if string_open { - let mut site = Site::from_line(lines, line_bytes, 1); - site.source = source_str.clone(); - return Err(LexError(site, - "Unclosed double-quoted string.".to_string())) + + /// Perform an action that potentially advances us through + /// the source-code, but restore to the lexer-state before said + /// action (after getting its result), e.g. peeking tokens. + pub fn restore<F, T>(&'a self, action: F) -> T + where F: FnOnce(&'a Self) -> T + { + // Remeber current position in source code. + let bo = self.byte_offset.get(); + let bol = self.byte_offset_line.get(); + let l = self.line.get(); + // Do some action, advancing the position. + let ret = action(self); + // Reset position to before doing the action. + self.byte_offset.set(bo); + self.byte_offset_line.set(bol); + self.line.set(l); + // What did the action produce. + ret + } + + /// Look ahead to the next token without advancing the + /// source-code past it. + pub fn peek(&'a self) -> Result<Token<'a>, LexError<'a>> { + self.restore(|this: &'a Self| { + this.consume() + }) + } + + /// Check if file is at end-of-file. + pub fn eof(&self) -> bool { + self.restore(|this: &Self| { + let _ = this.consume_whitespace(); + self.peek_char().is_none() + }) } - Ok(tokens) } diff --git a/src/parse/mod.rs b/src/parse/mod.rs @@ -1,18 +1,15 @@ pub mod tokens; - pub mod lexer; - pub mod parser; +pub mod expander; pub use parser::ParseTree; use std::{fs, path::Path, error::Error}; -/// Parse a file without expanding macros. -pub fn parse_file_noexpand(path : &Path) -> Result<ParseTree, Box<dyn Error>> { +/// Build a parser for a file without expanding macros. +pub fn parser_for_file(path: &Path) -> Result<parser::Parser, Box<dyn Error>> { let contents = fs::read_to_string(&path)?; - let tokens = lexer::lex(contents, Some(path))?; - let tree = parser::parse_stream(tokens)?; - Ok(tree) + let tokenizer = lexer::Lexer::new(path.to_string_lossy().to_string(), contents); + let builder = parser::Parser::new(tokenizer); + Ok(builder) } - -pub mod expander; diff --git a/src/parse/parser.rs b/src/parse/parser.rs @@ -1,38 +1,46 @@ use std::{fmt, error::Error}; -use super::tokens::{self, Kind, Site, Token}; +use unicode_width::UnicodeWidthStr; +use descape::UnescapeExt; + +use super::{lexer::{LexError, Lexer}, tokens::{Kind, Site, Token}}; #[derive(Debug, Clone)] -pub struct Node { - pub site : Site, - pub value : String +pub struct Node<'a> { + pub value: String, + pub site: Site<'a>, + pub leading_whitespace: String, } -impl Node { - pub fn new(value : &str, site : &Site) -> Self { +impl<'a> Node<'a> { + pub fn new(value: &str, site : &Site<'a>, leading_whitespace: &str) -> Self { Self { - site: site.to_owned(), - value: value.to_owned() + site: site.to_owned(), + value: value.to_owned(), + leading_whitespace: leading_whitespace.to_owned(), } } } #[derive(Debug, Clone)] -pub struct AttributeNode { - pub keyword : String, - pub node : Box<ParseNode>, - pub site : Site -} - -#[derive(Debug, Clone)] -pub enum ParseNode { - Symbol(Node), - Number(Node), - String(Node), - List(Vec<ParseNode>), - Attribute(AttributeNode) +pub enum ParseNode<'a> { + Symbol(Node<'a>), + Number(Node<'a>), + String(Node<'a>), + List { + nodes: Box<[ParseNode<'a>]>, + site: Site<'a>, + end_token: Token<'a>, + leading_whitespace: String, + }, + Attribute { + keyword: String, + node: Box<ParseNode<'a>>, + site: Site<'a>, + leading_whitespace: String, + }, } -impl ParseNode { +impl<'a> ParseNode<'a> { pub fn symbolic(&self) -> Option<Node> { match self { Self::Symbol(node) @@ -40,6 +48,7 @@ impl ParseNode { _ => None } } + pub fn atomic(&self) -> Option<Node> { match self { Self::Symbol(node) @@ -48,43 +57,235 @@ impl ParseNode { _ => None } } - pub fn site(&self) -> Site { + + pub fn site(&self) -> &Site { match self { - Self::Symbol(node) - | Self::Number(node) - | Self::String(node) => node.site.to_owned(), - Self::List(list) => { - if let Some(head) = list.first() { - head.site() - } else { - panic!("No empty lists should be allowed.") - } - }, - Self::Attribute(attr) => attr.site.to_owned(), + Self::Symbol(ref node) + | Self::Number(ref node) + | Self::String(ref node) => &node.site, + Self::List { ref site, .. } => site, + Self::Attribute { ref site, .. } => site, + } + } + + pub fn leading_whitespace(&'a self) -> &'a str { + match self { + Self::Symbol(ref node) + | Self::Number(ref node) + | Self::String(ref node) => &node.leading_whitespace, + Self::List { ref leading_whitespace, .. } => leading_whitespace, + Self::Attribute { ref leading_whitespace, .. } => leading_whitespace, + } + } + + pub fn set_leading_whitespace(&mut self, whitespace: String) { + match self { + Self::Symbol(ref mut node) + | Self::Number(ref mut node) + | Self::String(ref mut node) => node.leading_whitespace = whitespace, + Self::List { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, + Self::Attribute { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, + }; + } + + pub fn node_type(&self) -> &'static str { + match self { + Self::Symbol(..) => "symbol", + Self::Number(..) => "number", + Self::String(..) => "string", + Self::List { .. } => "list", + Self::Attribute { .. } => "attribute", + } + } +} + +pub type ParseTree<'a> = Box<[ParseNode<'a>]>; + +#[derive(Debug, Clone)] +pub struct ParseError<'a>(pub String, pub Site<'a>); + +impl<'a> fmt::Display for ParseError<'a> { + fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { + let ParseError(msg, site) = self; + let line_prefix = format!(" {} |", site.line); + let line_view = site.line_slice(); + writeln!(f, "{} {}", line_prefix, line_view)?; + writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", + prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), + text_offset=site.line_column() - 1, + length=site.width())?; + write!(f, "[**] Parse Error ({}:{}:{}): {}", + site.source, site.line, site.line_column(), msg) + } +} + +impl<'a> Error for ParseError<'a> { } + +/// Parser structure walks through source using lexer. +#[derive(Debug, Clone)] +pub struct Parser { + lexer: Lexer, //< Parser owns a lexer. +} + +impl<'a> Parser { + pub fn new(lexer: Lexer) -> Self { + Self { lexer } + } + + pub fn get_source(&'a self) -> &'a str { + self.lexer.get_source() + } + + /// Parse whole source code, finishing off the lexer. + pub fn parse(&'a self) -> Result<ParseTree, Box<dyn Error + 'a>> { + let mut root: Vec<ParseNode> = Vec::new(); + while !self.lexer.eof() { + let expr = self.parse_expr()?; + root.push(expr); + } + return Ok(root.into_boxed_slice()); + } + + /// Produce a parse node from the current position in the lexer. + pub fn parse_expr(&'a self) -> Result<ParseNode, Box<dyn Error + 'a>> { + let token = self.lexer.peek()?; + match token.kind { + Kind::LParen => self.parse_list(), + Kind::RParen => Err(ParseError( + "Unexpected `)' closing parenthesis.".to_owned(), + token.site.to_owned()))?, + Kind::Keyword => self.parse_keyword(), + Kind::Symbol => Ok(ParseNode::Symbol(self.parse_atomic()?)), + // TODO: Parse (escpae) string-literals. + Kind::String => Ok(ParseNode::String(self.parse_atomic()?)), + Kind::Number => Ok(ParseNode::Number(self.parse_atomic()?)), } } + + /// Parse keyword-attribute pair. + fn parse_keyword(&'a self) -> Result<ParseNode, Box<dyn Error + 'a>> { + // Consume :keyword token. + let token = self.lexer.consume()?; + assert_eq!(token.kind, Kind::Keyword); + // Check we are able to consume next expression for keyword's value. + { + let no_expr_error = ParseError( + format!("Keyword `:{}' expects an expression follwing it.", token.value), + token.site.to_owned()); + if self.lexer.eof() { Err(no_expr_error.clone())? ;} + match self.lexer.peek()? { + Token { kind: Kind::RParen, .. } => Err(no_expr_error)?, + _ => () + } + } + // Otherwise, parse the value and combine the node. + let value = self.parse_expr()?; + Ok(ParseNode::Attribute { + keyword: token.value.to_owned(), + node: Box::new(value), + site: token.site.to_owned(), + leading_whitespace: token.leading_whitespace.to_owned(), + }) + } + + /// Parse a literal node. + /// This is where escapes in symbols and strings are handled. + fn parse_atomic(&'a self) -> Result<Node<'a>, LexError<'a>> { + let token = self.lexer.consume()?; + let value = match token.kind { + Kind::Symbol | Kind::Number | Kind::Keyword => escape_sanitize(token.value), + Kind::String => escape_string(token.value, &token.site)?, + _ => unreachable!("called `parse_atomic` on non-atomic token."), + }; + Ok(Node { + value, + site: token.site.clone(), + leading_whitespace: token.leading_whitespace.to_string(), + }) + } + + /// Parse a list `( [...] )'. + fn parse_list(&'a self) -> Result<ParseNode<'a>, Box<dyn Error + 'a>> { + // Consumed the `(' token. + let lparen = self.lexer.consume()?; + assert_eq!(lparen.kind, Kind::LParen); + // Collect list elements. + let mut elements = Vec::new(); + let mut rparen: Option<Token> = None; + while !self.lexer.eof() { + // Keep parsing expressions until `)' is reached. + let token = self.lexer.peek()?; + if token.kind == Kind::RParen { + rparen = Some(self.lexer.consume()?); // Swallow up `)'. + break; + } + let expr = self.parse_expr()?; + elements.push(expr); + } + // Closing parenthesis was never found. + let Some(rparen) = rparen else { + return Err(ParseError( + "Expected `)' closing parenthesis.".to_owned(), + lparen.site.to_owned()))?; + }; + Ok(ParseNode::List { + nodes: elements.into_boxed_slice(), + site: lparen.site.to_owned(), + end_token: rparen.to_owned(), + leading_whitespace: lparen.leading_whitespace.to_owned(), + }) + } } -pub type ParseTree = Vec<ParseNode>; +/// Santize any escaped characters by removing their leading backslash. +fn escape_sanitize(string: &str) -> String { + let mut builder = String::with_capacity(string.len()); + let mut chars = string.chars(); + while let Some(c) = chars.next() { + if c == '\\' { continue; } + builder.push(c) + } + builder +} -pub trait SearchTree { +/// Parse a string with its escapes. +/// **Note:** Uses the `descape` crate for now. +fn escape_string<'a>(string: &'a str, site: &Site<'a>) -> Result<String, LexError<'a>> { + string.to_unescaped() + .map(|s| s.to_string()) + .map_err(|index| { + LexError( + format!("Invalid escape `\\{}' at byte-index {}.", + string.chars().nth(index).unwrap_or('?'), index), + site.clone()) + }) +} + +pub trait SearchTree<'a> { /// Search the parse-tree for a specific node with a specific value. - fn search_node(&self, kind : SearchType, - value : &str, - case_insensitive : bool, - level : usize) -> Option<ParseNode>; + fn search_node(&'a self, kind: SearchType, + value: &str, + case_insensitive: bool, + level: usize) -> Option<&ParseNode<'a>>; } #[derive(Clone, Copy, PartialEq)] pub enum SearchType { ListHead, ListMember, Symbol, Number, String, - Attribute + Attribute, + Any, +} + +impl SearchType { + pub fn is_a(self, kind: SearchType) -> bool { + self == SearchType::Any || self == kind + } } -impl SearchTree for ParseTree { - fn search_node(&self, kind : SearchType, value : &str, - insensitive : bool, level: usize) -> Option<ParseNode> { +impl<'a> SearchTree<'a> for ParseNode<'a> { + fn search_node(&'a self, kind: SearchType, value: &str, + insensitive: bool, level: usize) -> Option<&ParseNode<'a>> { if level == 0 { return None; } @@ -95,193 +296,71 @@ impl SearchTree for ParseTree { string == value }; - for node in self { - let found = match node { - ParseNode::List(nodes) => { - if kind == SearchType::ListHead { - if let Some(Some(caller)) = nodes.get(0).map(ParseNode::atomic) { - if is_equal(&caller.value) { - return Some(node.clone()); - } - } - } - nodes.search_node(kind, value, insensitive, level - 1) - }, - ParseNode::Symbol(name) => { - if kind == SearchType::Symbol && is_equal(&name.value) { - Some(node.clone()) - } else { - None - } - }, - ParseNode::String(name) => { - if kind == SearchType::String && is_equal(&name.value) { - Some(node.clone()) - } else { - None - } - }, - ParseNode::Number(name) => { - if kind == SearchType::Number && is_equal(&name.value) { - Some(node.clone()) - } else { - None - } - }, - ParseNode::Attribute(attr) => { - if kind == SearchType::Attribute { - if is_equal(&attr.keyword) { - return Some(node.clone()); + match self { + ParseNode::List { nodes, .. } => { + if kind.is_a(SearchType::ListHead) { + if let Some(Some(caller)) = nodes.get(0).map(ParseNode::atomic) { + if is_equal(&caller.value) { + return Some(self); } } - let singleton : ParseTree = vec![*attr.node.clone()]; - singleton.search_node(kind, value, insensitive, level - 1) } - }; - - if found.is_some() { - return found; - } - } - - None - } -} - -#[derive(Debug, Clone)] -pub struct ParseError(pub String, pub Site); - -impl fmt::Display for ParseError { - fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "[**] Parse Error {}: {}", - self.1, self.0) - } -} - -impl Error for ParseError { } - -fn parse_atomic(token : &Token) -> Result<ParseNode, ParseError> { - let node = Node::new(&token.value, &token.site); - match token.kind { - Kind::Symbol => Ok(ParseNode::Symbol(node)), - Kind::String => Ok(ParseNode::String(node)), - Kind::Number => Ok(ParseNode::Number(node)), - Kind::Whitespace => Ok(ParseNode::String(node)), - _ => Err(ParseError( - String::from("Atomic token not found here."), - token.site.clone())) - } -} - -pub fn parse(tokens : &[Token]) - -> Result<(ParseNode, &[Token]), ParseError> { - let token = &tokens[0]; - match token.kind { - Kind::LParen => { - // Parse list. - let open_paren = token.site.clone(); - let mut slice = &tokens[1..]; - if slice.is_empty() { - return Err(ParseError( - "Expected `)' (closing parenthesis), got EOF." - .to_owned(), token.site.clone())); - } - // Ignore leading white space in head of list. - if slice[0].kind == Kind::Whitespace { - slice = &slice[1..]; - } - let mut elements = Vec::new(); - let mut token = &slice[0]; - - let mut i = 0; - loop { - i += 1; - if slice.is_empty() { - return Err(ParseError( - "Expected `)' (closing parenthesis), got EOF." - .to_owned(), token.site.clone())); + nodes.search_node(kind, value, insensitive, level - 1) + }, + ParseNode::Symbol(name) => { + if kind.is_a(SearchType::Symbol) && is_equal(&name.value) { + Some(self) + } else { + None } - token = &slice[0]; - if token.kind == Kind::RParen - { break; } // End of list. - if token.kind == Kind::Whitespace && i == 2 { - // Skip whitespace immediately after head. - slice = &slice[1..]; - continue; + }, + ParseNode::String(name) => { + if kind.is_a(SearchType::String) && is_equal(&name.value) { + Some(self) + } else { + None } - - let (element, left) = parse(&slice)?; - elements.push(element); - slice = left; - } - slice = &slice[1..]; // Ignore last r-paren. - if elements.is_empty() { - // Empty lists have an invisible empty symbol in them. - let node = Node::new("", &open_paren); - elements.push(ParseNode::Symbol(node)); - } - Ok((ParseNode::List(elements), slice)) - }, - Kind::Keyword => { - // Parse second token, make attribute. - let (node, mut slice) = parse(&tokens[1..])?; - let attribute = AttributeNode { - keyword: token.value[1..].to_owned(), - node: Box::new(node), - site: token.site.to_owned() - }; - // White space after attributes don't count. - if let Some(next) = slice.first() { - if next.kind == Kind::Whitespace { - slice = &slice[1..]; + }, + ParseNode::Number(name) => { + if kind.is_a(SearchType::Number) && is_equal(&name.value) { + Some(self) + } else { + None } - } - Ok((ParseNode::Attribute(attribute), slice)) - }, - Kind::RParen => { - Err(ParseError("Unexpected `)' (closing parenthesis). \ - Perhaps you forgot an opening parenthesis?".to_owned(), - token.site.clone())) - }, - _ => { // Any atomic tokens. - Ok((parse_atomic(&token)?, &tokens[1..])) + }, + ParseNode::Attribute { node, ref keyword, .. } => { + if kind.is_a(SearchType::Attribute) { + if is_equal(keyword) { + return Some(node); + } + } + node.search_node(kind, value, insensitive, level - 1) + }, } } } -pub fn parse_stream(tokens: tokens::TokenStream) - -> Result<ParseTree, ParseError> { - let mut tree = Vec::new(); - let mut slice = &tokens[..]; - while !slice.is_empty() { - let (node, next) = parse(slice)?; - tree.push(node); - slice = next; - } - Ok(tree) -} - -/// Strip any pure whitespace (and annotation) nodes from the tree. -pub fn strip(tree : &[ParseNode], strip_attributes : bool) -> ParseTree { - let mut stripped = tree.to_owned(); - stripped.retain(|branch| { - match branch { - ParseNode::String(node) => !node.value.trim().is_empty(), - ParseNode::Attribute(_) => !strip_attributes, - _ => true +impl<'a> SearchTree<'a> for ParseTree<'a> { + fn search_node(&'a self, kind: SearchType, value: &str, + insensitive: bool, level: usize) -> Option<&ParseNode<'a>> { + if level == 0 { + return None; } - }); - for branch in stripped.iter_mut() { - if let ParseNode::List(ref mut list) = branch { - *list = strip(list, strip_attributes); + + for node in self { + let found = node.search_node(kind, value, insensitive, level); + if found.is_some() { + return found; + } } + + None } - stripped } /// Pretty printing for parse nodes. #[cfg(feature="debug")] -impl fmt::Display for ParseNode { +impl<'a> fmt::Display for ParseNode<'a> { fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { match self { ParseNode::Symbol(node) @@ -293,15 +372,15 @@ impl fmt::Display for ParseNode { write!(f, "\"{}\"", &node.value) } }, - ParseNode::Attribute(attr) => write!(f, ":{} {}", - &attr.keyword, &*attr.node), - ParseNode::List(list) => if list.len() == 0 { + ParseNode::Attribute { keyword, node, .. } => write!(f, ":{} {}", + &keyword, &*node), + ParseNode::List { nodes, .. } => if nodes.len() == 0 { write!(f, "()") - } else if let [ single ] = list.as_slice() { + } else if let [single] = &**nodes { write!(f, "({})", single) } else { - write!(f, "({}{})", &list[0], - list[1..].iter().fold(String::new(), |acc, elem| { + write!(f, "({}{})", nodes[0], + nodes[1..].iter().fold(String::new(), |acc, elem| { let nested = elem.to_string().split('\n') .fold(String::new(), |acc, e| acc + "\n " + &e); @@ -311,4 +390,3 @@ impl fmt::Display for ParseNode { } } } - diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs @@ -1,54 +1,93 @@ -use std::rc::Rc; use std::fmt::{self, Display}; +use unicode_width::UnicodeWidthStr; #[derive(Debug, Clone)] -pub struct Site { - pub source : Option<Rc<str>>, - pub line : usize, - pub bytes_from_start : usize, - pub bytes_span : usize, +pub struct Site<'a> { + pub source: &'a str, + pub source_code: &'a str, // TODO: propagate! + pub line: usize, + pub bytes_from_start: usize, + pub bytes_from_start_of_line: usize, + pub bytes_span: usize, } -impl Site { - pub fn new(source : Rc<str>, line : usize, - bytes_from_start : usize, - bytes_span : usize) -> Self { +pub const UNKNOWN_SITE: Site<'static> = Site { + source: "<unknwon>", + source_code: "", + line: 0, + bytes_from_start: 0, + bytes_from_start_of_line: 0, + bytes_span: 0, +}; + +impl<'a> Site<'a> { + pub fn new(source: &'a str, + source_code: &'a str, + line: usize, + bytes_from_start : usize, + bytes_from_start_of_line: usize, + bytes_span : usize) -> Self { Self { - source: Some(source), - line, bytes_from_start, - bytes_span + source, + source_code, + line, + bytes_from_start, + bytes_from_start_of_line, + bytes_span, } } - pub fn fake() -> Self { - Self { - source: None, - line: 0, - bytes_from_start: 0, - bytes_span: 0 - } + pub const fn unknown() -> Self { UNKNOWN_SITE } + + /// Byte-offset in source code for start-of-line where this site is. + pub fn start_of_line(&self) -> usize { + self.bytes_from_start - self.bytes_from_start_of_line } - pub fn from_line(line : usize, - bytes_from_start : usize, - bytes_span : usize) -> Self { - Self { - source: None, - line, bytes_from_start, - bytes_span + /// Find byte-offset in source code of end-of-line where this site is. + pub fn end_of_line(&self) -> usize { + let mut i = self.bytes_from_start; + let bytes = self.source_code.as_bytes(); + while i < self.source_code.len() { + if bytes[i] == '\n' as u8 { + return i; + } + i += 1; } + return i; + } + + pub fn view(&'a self) -> &'a str { + let start = self.bytes_from_start; + let end = start + self.bytes_span; + &self.source_code[start..end] + } + + /// Get string view into whole line that site is referencing. + pub fn line_slice(&self) -> &'a str { + &self.source_code[self.start_of_line()..self.end_of_line()] + } + + /// Compute (monospace, terminal) column width of piece of text + /// referenced by this site in the source code. + pub fn width(&self) -> usize { + let text = &self.source_code[self.bytes_from_start..self.bytes_from_start + self.bytes_span]; + UnicodeWidthStr::width(text) + } + + /// Compute which column the site starts at on the line. + pub fn line_column(&self) -> usize { + let preceeding = &self.source_code[self.start_of_line()..self.bytes_from_start]; + UnicodeWidthStr::width(preceeding) + 1 } } -impl Display for Site { +impl<'a> Display for Site<'a> { fn fmt(&self, f : &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "(")?; - if let Some(source) = &self.source { - write!(f, "{}:", source)?; - } else { - write!(f, "<stdin>:")?; - } - write!(f, "{}:{})", self.line, self.bytes_from_start + 1) + write!(f, "{}:", self.source)?; + write!(f, "{}:{}", self.line, self.line_column())?; + write!(f, ")") } } @@ -60,20 +99,18 @@ pub enum Kind { String, Number, Keyword, - Whitespace, } #[derive(Debug, Clone)] -pub struct Token { - pub kind : Kind, - pub value : String, - pub site : Site, +pub struct Token<'a> { + pub kind: Kind, + pub value: &'a str, + pub leading_whitespace: &'a str, + pub site: Site<'a>, } -impl Token { - pub fn new(kind : Kind, value : String, site : Site) -> Self { - Self { kind, value, site } +impl<'a> Token<'a> { + pub fn new(kind: Kind, value: &'a str, leading_whitespace: &'a str, site: Site<'a>) -> Self { + Self { kind, value, leading_whitespace, site } } } - -pub type TokenStream = Vec<Token>; diff --git a/test.sex b/test.sex @@ -1,16 +0,0 @@ -(!DOCTYPE html) -(html :lang en - (head - (title Example HTML Document) - (style (%include "./test-css.sex"))) - (body - (p :id hello Hello, World!) - (p something something text...) - (h1 "A (big) Header!") - (p Yet some more - (span :style "color: red" text) <3) - (p Hello(span :style "color: green" World)!) - (img - :alt "Cute cat" - :src "https://static.insider.com/image/5d24d6b921a861093e71fef3.jpg" - :width 300)))