seam

Symbolic-Expressions As Markup.
git clone git://git.knutsen.co/seam
Log | Files | Refs | README | LICENSE

commit 53018a113f1cd202aa4a36d60bb97d3827f51934
parent 5be4f703571f29d8cdc06540dba0bfb32b3d005a
Author: Demonstrandum <samuel@knutsen.co>
Date:   Thu, 28 Nov 2024 17:27:15 +0000

Partially implemented proc macro for parsing macro arguments.

Diffstat:
MCargo.lock | 179++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
MCargo.toml | 30++----------------------------
MREADME.md | 2++
Acrates/seam/Cargo.lock | 439+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/seam/Cargo.toml | 30++++++++++++++++++++++++++++++
Rsrc/assemble/css.rs -> crates/seam/src/assemble/css.rs | 0
Rsrc/assemble/html.rs -> crates/seam/src/assemble/html.rs | 0
Acrates/seam/src/assemble/mod.rs | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/assemble/sexp.rs -> crates/seam/src/assemble/sexp.rs | 0
Rsrc/assemble/text.rs -> crates/seam/src/assemble/text.rs | 0
Rsrc/assemble/xml.rs -> crates/seam/src/assemble/xml.rs | 0
Rsrc/bin.rs -> crates/seam/src/bin.rs | 0
Acrates/seam/src/lib.rs | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/seam/src/parse/expander.rs | 760+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/parse/lexer.rs -> crates/seam/src/parse/lexer.rs | 0
Acrates/seam/src/parse/macros.rs | 394+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/seam/src/parse/mod.rs | 17+++++++++++++++++
Acrates/seam/src/parse/parser.rs | 512+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/seam/src/parse/tokens.rs | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/seam_argparse_proc_macro/Cargo.toml | 15+++++++++++++++
Acrates/seam_argparse_proc_macro/src/lib.rs | 415+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/assemble/mod.rs | 160-------------------------------------------------------------------------------
Dsrc/lib.rs | 39---------------------------------------
Dsrc/parse/expander.rs | 739-------------------------------------------------------------------------------
Dsrc/parse/mod.rs | 15---------------
Dsrc/parse/parser.rs | 444-------------------------------------------------------------------------------
Dsrc/parse/tokens.rs | 116-------------------------------------------------------------------------------
Asrc/seam_argparse_proc_macro/lib.rs | 6++++++
28 files changed, 3055 insertions(+), 1598 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -3,6 +3,15 @@ version = 3 [[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] name = "android-tzdata" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -31,9 +40,12 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "cc" -version = "1.0.98" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48" +dependencies = [ + "shlex", +] [[package]] name = "cfg-if" @@ -52,7 +64,7 @@ dependencies = [ "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -67,9 +79,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "descape" @@ -108,30 +120,36 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "num-traits" @@ -150,23 +168,52 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] [[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] name = "seam" version = "0.3.0" dependencies = [ @@ -174,14 +221,31 @@ dependencies = [ "colored", "descape", "formatx", + "regex", + "seam_argparse_proc_macro", "unicode-width", ] [[package]] +name = "seam_argparse_proc_macro" +version = "0.0.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] name = "syn" -version = "2.0.66" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -196,25 +260,26 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", @@ -227,9 +292,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -237,9 +302,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", @@ -250,9 +315,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "windows-core" @@ -260,7 +325,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -289,18 +354,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -311,9 +376,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -323,9 +388,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -335,15 +400,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -353,9 +418,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -365,9 +430,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -377,9 +442,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -389,6 +454,6 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml @@ -1,28 +1,2 @@ -[package] -name = "seam" -description = "Symbolic Expressions As Markup." -keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"] -license-file = "LICENSE" -homepage = "https://git.knutsen.co/seam" -version = "0.3.0" -authors = ["Demonstrandum <samuel@knutsen.co>"] -edition = "2021" - -[features] -# default = ["debug"] -debug = [] - -[lib] -name = "seam" -path = "src/lib.rs" - -[[bin]] -name = "seam" -path = "src/bin.rs" - -[dependencies] -colored = "2.1" -chrono = "0.4" -unicode-width = "0.1.12" -descape = "1.1.2" -formatx = "0.2.2" +[workspace] +members = ["crates/seam", "crates/seam_argparse_proc_macro"] diff --git a/README.md b/README.md @@ -91,6 +91,7 @@ seam --sexp <<< '(hello (%define subject world) %subject)' ``` ## Checklist + - [ ] A *splat* operation: `(%splat (a b c))` becomes `a b c`. - [x] `(%define x %body)` evaluates `%body` eagerly (at definition), while `(%define (y) %body)` only evaluates `%body` per call-site `(%y)`. - [x] Namespace macro `(%namespace ns (%include "file.sex"))` will prefix all definitions in its body with `ns/`, e.g. `%ns/defn`. @@ -109,6 +110,7 @@ seam --sexp <<< '(hello (%define subject world) %subject)' - [ ] Implement lexical scope by letting macros store a copy of the scope they were defined in (or a reference?). - [x] `(%embed "/path")` macro, like `%include`, but just returns the file contents as a string. - [ ] Variadic arguments via `&rest` syntax. + - [ ] Type-checking facilities for user macros (?). - [ ] Delayed evaluation of macros by `%(...)` syntax. [ ] For example `%(f x y)` is the same as `(%f x y)`, so you can have `(%define uneval f x)` and then write `%(%uneval y)`. - [ ] `%list` macro which expands from `(p (%list a b c))` to `(p a b c)`. diff --git a/crates/seam/Cargo.lock b/crates/seam/Cargo.lock @@ -0,0 +1,439 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cc" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.52.5", +] + +[[package]] +name = "colored" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" +dependencies = [ + "lazy_static", + "windows-sys", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "descape" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "396a0a312bef78b5f62b0251d7162c4b8af162949b8b104d2967e41b26c1b68c" + +[[package]] +name = "formatx" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db0f0c49aba98a3b2578315766960bd242885ff672fd62610c5557cd6c6efe03" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "seam" +version = "0.3.0" +dependencies = [ + "chrono", + "colored", + "descape", + "formatx", + "regex", + "unicode-width", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-width" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/crates/seam/Cargo.toml b/crates/seam/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "seam" +description = "Symbolic Expressions As Markup." +keywords = ["markup", "lisp", "macro", "symbolic-expression", "sexp"] +license-file = "LICENSE" +homepage = "https://git.knutsen.co/seam" +version = "0.3.0" +authors = ["Demonstrandum <samuel@knutsen.co>"] +edition = "2021" + +[features] +# default = ["debug"] +debug = [] + +[lib] +name = "seam" +path = "src/lib.rs" + +[[bin]] +name = "seam" +path = "src/bin.rs" + +[dependencies] +seam_argparse_proc_macro = { path = "../seam_argparse_proc_macro" } +colored = "2.1" +chrono = "0.4" +unicode-width = "0.1.12" +descape = "1.1.2" +formatx = "0.2.2" +regex = "1.10.5" diff --git a/src/assemble/css.rs b/crates/seam/src/assemble/css.rs diff --git a/src/assemble/html.rs b/crates/seam/src/assemble/html.rs diff --git a/crates/seam/src/assemble/mod.rs b/crates/seam/src/assemble/mod.rs @@ -0,0 +1,143 @@ +use crate::{impl_clone_box, CloneBox, parse::tokens::Site}; +use std::{convert, error::Error, fmt::{self, Debug}}; + +use colored::*; +use unicode_width::UnicodeWidthStr; + +/// Error type for specific errors with generating +/// each type of markup. +#[derive(Debug, Clone)] +pub struct GenerationError<'a> { + pub markup: &'static str, + pub message: String, + pub site: Site<'a>, +} + +impl<'a> GenerationError<'a> { + /// Create a new error given the ML, the message, and the site. + pub fn new(ml: &'static str, msg: &str, site: &Site<'a>) -> Self { + Self { + markup: ml, + message: msg.to_owned(), + site: site.to_owned(), + } + } +} + +/// Implement fmt::Display for user-facing error output. +impl<'a> fmt::Display for GenerationError<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let line_prefix = format!(" {} |", self.site.line); + let line_view = self.site.line_slice(); + writeln!(f, "{} {}", line_prefix, line_view)?; + writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", + prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), + text_offset=self.site.line_column() - 1, + length=self.site.width())?; + write!(f, "{}: {}", + format!("[{}] Error Generating {} ({}:{}:{})", + "**".red().bold(), + self.markup.bold(), + self.site.source, + self.site.line, + self.site.line_column(), + ).black(), + self.message) + } +} + +/// Implements std::error::Error. +impl<'a> Error for GenerationError<'a> { } + +/// Convert from an io::Error to a generation error. +impl<'a> From<std::io::Error> for GenerationError<'a> { + fn from(e: std::io::Error) -> Self { + Self { + markup: "<markup>", + message: format!("IO error: {}", e), + site: Site::unknown(), + } + } +} + +/// An fmt::Error can be cast to an equally horribly +/// ambiguous GenerationError. +impl<'a> convert::From<fmt::Error> for GenerationError<'a> { + fn from(e: fmt::Error) -> Self { + Self { + markup: "<markup>", + message: format!("Format buffer error: {}", e), + site: Site::unknown(), + } + } +} + +pub type Formatter<'a> = &'a mut dyn fmt::Write; + +/// Trait for all structs that can generate specific markup +/// for the s-expression tree. +pub trait MarkupFormatter: Debug + CloneBox { + // Required definitions: + /// Similar to fmt in Display/Debug traits, takes in a + /// mutable writable buffer, returns success or a specifc + /// error while generating the markup. + fn generate(&self, buf: Formatter) -> Result<(), GenerationError>; + /// Documentises the input, that's to say, it adds any + /// extra meta-information to the generated markup, if + /// the s-expressions your wrote ommited it. + /// e.g. All XML gets a `<?xml ... ?>` tag added to it. + fn document(&self) -> Result<String, GenerationError>; + // Default definitions: + /// Directly converts the s-expressions into a string + /// containing the markup, unless there was an error. + fn display(&self) -> Result<String, GenerationError> { + let mut buf = String::new(); + self.generate(&mut buf)?; + Ok(buf) + } +} + +impl_clone_box! { 'a; dyn MarkupFormatter + 'a} + +/// Automatically implement fmt::Display as a wrapper around +/// MarkupFormatter::generate, but throws away the useful error message. +impl fmt::Display for dyn MarkupFormatter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.generate(f).map_err(|_| fmt::Error) + } +} + +/// Parforms the following escapes: +/// - `<` → `&lt;` +/// - `>` → `&gt;` +/// - `"` → `&quot;` +/// - `'` → `&apos;` +/// - `&` → `&amp;` +pub fn escape_xml(string: &str) -> String { + let mut bytes = string.bytes(); + let mut byte_builder: Vec<u8> = Vec::with_capacity(bytes.len()); + while let Some(byte) = bytes.next() { + match byte { + b'<' => byte_builder.extend(b"&lt;"), + b'>' => byte_builder.extend(b"&gt;"), + b'"' => byte_builder.extend(b"&quot;"), + b'\'' => byte_builder.extend(b"&apos;"), + b'&' => byte_builder.extend(b"&amp;"), + _ => byte_builder.push(byte) + } + } + unsafe { + String::from_utf8_unchecked(byte_builder) + } +} + +/// Re-constitute original S-expressions. +pub mod sexp; +/// Converts source into expanded plain-text. +pub mod text; +/// XML generation. +pub mod xml; +/// HTML5 CSS generation. +pub mod css; +/// HTML5 HTML generation. +pub mod html; diff --git a/src/assemble/sexp.rs b/crates/seam/src/assemble/sexp.rs diff --git a/src/assemble/text.rs b/crates/seam/src/assemble/text.rs diff --git a/src/assemble/xml.rs b/crates/seam/src/assemble/xml.rs diff --git a/src/bin.rs b/crates/seam/src/bin.rs diff --git a/crates/seam/src/lib.rs b/crates/seam/src/lib.rs @@ -0,0 +1,72 @@ +#![allow(incomplete_features)] +#![feature(pattern)] +#![feature(box_patterns)] +#![feature(associated_type_defaults)] + +pub mod parse; +pub mod assemble; + +use parse::{expander, parser, lexer}; + +use std::{fs, io, path::Path}; + +pub const VERSION: (u8, u8, u8) = (0, 3, 0); + +/* Utilities. */ + +/// See: <https://stackoverflow.com/a/30353928> +pub trait CloneBox { + fn clone_box(&self) -> *mut (); +} + +impl<'a, T> CloneBox for T where T: Clone + 'a { + fn clone_box(&self) -> *mut () { + Box::<T>::into_raw(Box::new(self.clone())) as *mut () + } +} + +#[macro_export] +macro_rules! impl_clone_box { + ($($lif:tt),* ; $tra:ty) => { + impl< $($lif),* > Clone for Box< $tra > { + fn clone(&self) -> Box< $tra > { + unsafe { + *Box::from_raw(self.clone_box() as *mut Self) + } + } + } + }; + ($($lif:tt),* ; $($gen:tt),* ; $tra:ty) => { + impl< $($lif),* , $($gen),* > Clone for Box< $tra > { + fn clone(&self) -> Box< $tra > { + unsafe { + *Box::from_raw(self.clone_box() as *mut Self) + } + } + } + }; +} + +/* Library helpers. */ + +pub fn tree_builder<'a, P: AsRef<Path>>(source_path: Option<P>, string: String) + -> expander::Expander<'a> { + let path = source_path.map_or("<stdin>".to_string(), + |s| s.as_ref().to_string_lossy().to_string()); + let tokenizer = lexer::Lexer::new(path, string); + let builder = parser::Parser::new(tokenizer); + expander::Expander::new(builder) +} + +pub fn tree_builder_file<'a>(path: &Path) + -> io::Result<expander::Expander<'a>> { + let contents = fs::read_to_string(&path)?; + Ok(tree_builder(Some(path), contents)) +} + +pub fn tree_builder_stream(stream: &mut impl io::Read) + -> io::Result<expander::Expander> { + let mut contents = String::new(); + stream.read_to_string(&mut contents)?; + Ok(tree_builder(Option::<&Path>::None, contents)) +} diff --git a/crates/seam/src/parse/expander.rs b/crates/seam/src/parse/expander.rs @@ -0,0 +1,760 @@ +use super::macros::*; +use super::parser::{Node, ParseNode, ParseTree, Parser}; +use super::tokens::Site; + +use std::fmt::Display; +use std::{ + fmt, + cell::RefCell, + path::PathBuf, + ffi::OsString, + error::Error, + rc::Rc, + collections::{ + HashMap, + BTreeSet, + }, +}; + +use colored::*; +use formatx; +use unicode_width::UnicodeWidthStr; + +/// Error type for errors while expanding macros. +#[derive(Debug, Clone)] +pub struct ExpansionError<'a>(pub String, pub Site<'a>); + +impl<'a> ExpansionError<'a> { + /// Create a new error given the ML, the message, and the site. + pub fn new(msg: &str, site: &Site<'a>) -> Self { + Self(msg.to_owned(), site.to_owned()) + } +} + +/// Implement fmt::Display for user-facing error output. +impl<'a> fmt::Display for ExpansionError<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ExpansionError(msg, site) = self; + let line_prefix = format!(" {} |", site.line); + let line_view = site.line_slice(); + writeln!(f, "{} {}", line_prefix, line_view)?; + writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", + prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), + text_offset=site.line_column() - 1, + length=site.width())?; + write!(f, "[{}] Error Expanding Macro {}: {}", + "**".red().bold(), site, msg) + } +} + +/// Implements std::error::Error for macro expansion error. +impl<'a> Error for ExpansionError<'a> { } + +/// A macro consists of: +/// - its name; +/// - its argument list (if any); +/// - and its defintion (i.e. *body*). +#[derive(Debug, Clone)] +pub struct Macro<'a> { + name: String, + params: Box<[String]>, + body: Box<[ParseNode<'a>]> +} +// TODO: Macro to also store its own scope (at place of definition) +// in order to implement lexical scoping. + +impl<'a> Macro<'a> { + pub fn new(name: &str) -> Macro { + Macro { + name: name.to_string(), + params: Box::new([]), + body: Box::new([]), + } + } +} + +/// Type of variable scope owned by an `Expander` instance. +pub type Scope<'a> = RefCell<HashMap<String, Rc<Macro<'a>>>>; // Can you believe this type? + +/// Macro expansion context, takes a parser and expands +/// any macro calls found in the generated parse-tree. +#[derive(Debug, Clone)] +pub struct Expander<'a> { + parser: Parser, + /// Include directories, in order of search. + includes: BTreeSet<PathBuf>, + subparsers: RefCell<Vec<Parser>>, + subcontexts: RefCell<Vec<Self>>, + invocations: RefCell<Vec<ParseNode<'a>>>, + definitions: Scope<'a>, +} + +impl<'a> Expander<'a> { + pub fn new(parser: Parser) -> Self { + Self { + parser, + includes: BTreeSet::from([PathBuf::from(".")]), + subparsers: RefCell::new(Vec::new()), + subcontexts: RefCell::new(Vec::new()), + invocations: RefCell::new(Vec::new()), + definitions: RefCell::new(HashMap::new()), + } + } + + /// Get underlying source-code of the active parser for current unit. + pub fn get_source(&self) -> &str { + self.parser.get_source() + } + + /// Supply additonal include-directories for the macros + /// to use when searching for files to include/emebed. + /// Files are searched for in the order that of the directories. + pub fn add_includes<T: Iterator>(&mut self, dirs: T) + where T::Item: Into<PathBuf> + { + for dir in dirs { + self.includes.insert(dir.into()); + } + } + + /// Add a subparser owned by the expander context. + fn register_parser(&self, parser: Parser) -> &'a Parser { + { + let mut parsers = self.subparsers.borrow_mut(); + parsers.push(parser); + } + self.latest_parser().unwrap() + } + + /// Get the latest subparser added. + fn latest_parser(&self) -> Option<&'a Parser> { + let p = self.subparsers.as_ptr(); + unsafe { (*p).last() } + } + + /// Create and register a subcontext built from the current context. + fn create_subcontext(&self) -> &mut Self { + { + let copy = self.clone(); + let mut contexts = self.subcontexts.borrow_mut(); + contexts.push(copy); + } + self.latest_context().unwrap() + } + + /// Get the latest subparser added. + fn latest_context(&self) -> Option<&mut Self> { + let contexts = self.subcontexts.as_ptr(); + unsafe { (*contexts).last_mut() } + } + + fn register_invocation(&self, node: ParseNode<'a>) -> &ParseNode<'a> { + let invocations = self.invocations.as_ptr(); + unsafe { + (*invocations).push(node); + (*invocations).last().unwrap() + } + } + + /// Update variable (macro) for this scope. + fn insert_variable(&self, name: String, var: Rc<Macro<'a>>) { + let mut defs = self.definitions.borrow_mut(); + defs.insert(name, var); + } + + /// Check if macro exists in this scope. + fn has_variable(&self, name: &str) -> bool { + let defs = self.definitions.borrow(); + defs.contains_key(name) + } + + fn get_variable(&self, name: &str) -> Option<Rc<Macro<'a>>> { + self.definitions.borrow().get(name).map(|m| m.clone()) + } + + /// Define a macro with `(%define a b)` --- `a` is a symbol or a list `(c ...)` where `c` is a symbol. + /// macro definitions will eliminate any preceding whitespace, so make sure trailing whitespace provides + /// the whitespace you need. + fn expand_define_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let [head, nodes@..] = &*params else { + return Err(ExpansionError( + format!("`%define` macro takes at least \ + two (2) arguments ({} were given.", params.len()), + node.owned_site())); + }; + + // If head is atomic, we assign to a 'variable'. + // Aditionally, we evaluate its body *eagerly*. + let def_macro = if let Some(variable) = head.atomic() { + let nodes = nodes.to_owned().into_boxed_slice(); + let body = self.expand_nodes(nodes)?; + Rc::new(Macro { + name: variable.value.clone(), + params: Box::new([]), + body, + }) + } else { // Otherwise, we are assigning to a 'function'. + let ParseNode::List { nodes: defn_nodes, .. } = head else { + return Err(ExpansionError( + "First argument of `%define` macro must be a list \ + or variable name/identifier.".to_owned(), + node.site().to_owned())); + }; + let [name, params@..] = &**defn_nodes else { + return Err(ExpansionError( + "`%define` macro definition must at \ + least have a name.".to_owned(), + node.site().to_owned())); + }; + let mut arguments: Vec<String> = Vec::with_capacity(params.len()); + for param_node in params { // Verify arguments are symbols. + if let ParseNode::Symbol(param) = param_node { + arguments.push(param.value.clone()); + } else { + return Err(ExpansionError( + "`define` function arguments must be \ + symbols/identifers.".to_owned(), + node.site().to_owned())); + }; + } + let ParseNode::Symbol(name_node) = name else { + return Err(ExpansionError( + "`define` function name must be \ + a symbol/identifier.".to_owned(), + node.site().to_owned())); + }; + let name = name_node.value.clone(); + + Rc::new(Macro { + name, + params: arguments.into_boxed_slice(), + body: nodes.to_owned().into_boxed_slice(), + }) + }; + + self.insert_variable(def_macro.name.to_owned(), def_macro); + Ok(Box::new([])) + } + + /// `(%ifdef symbol a b)` --- `b` is optional, however, if not provided *and* + /// the symbol is not defined, it will erase the whole expression, and whitespace will not + /// be preseved before it. If that's a concern, provide `b` as the empty string `""`. + fn expand_ifdef_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + if params.len() < 2 || params.len() > 3 { + return Err(ExpansionError(format!("`ifdef` takes one (1) \ + condition and one (1) consequent, a third optional \ + alternative expression may also be provided, but \ + `ifdef` was given {} arguments.", params.len()), + node.site().to_owned())); + } + let symbol = if let Some(node) = params[0].atomic() { + node.value.to_owned() + } else { + // FIXME: Borrow-checker won't let me use params[0].site() as site! + return Err(ExpansionError( + "The first argument to `ifdef` must be a symbol/name.".to_string(), + node.site().clone())); + }; + + let mut expanded = if self.has_variable(&symbol) { + self.expand_node(params[1].clone())? + } else { + if let Some(alt) = params.get(2) { + self.expand_node(alt.clone())? + } else { + Box::new([]) + } + }; + if let Some(first_node) = expanded.get_mut(0) { + first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); + } + Ok(expanded) + } + + fn expand_include_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?; + let [path_node] = &*params else { + return Err(ExpansionError( + format!("Incorrect number of arguments \ + to `%include' macro. Got {}, expected {}.", + params.len(), 1), + node.site().to_owned())); + }; + + let Some(Node { value: path, site, .. }) = path_node.atomic() else { + return Err(ExpansionError( + "Bad argument to `%include' macro.\n\ + Expected a path, but did not get any value + that could be interpreted as a path.".to_string(), + path_node.site().to_owned())) + }; + + // Open file, and parse contents! + let include_error = |error: Box<dyn Display>| ExpansionError( + format!("{}", error), site.to_owned()); + let mut parser: Result<Parser, ExpansionError> = Err( + include_error(Box::new("No path tested."))); + // Try all include directories until one is succesful. + for include_dir in &self.includes { + let path = include_dir.join(path); + parser = super::parser_for_file(&path) + .or_else(|err| { + let err = Box::new(err); + // Try with `.sex` extensions appended. + let mut with_ext = PathBuf::from(&path); + let filename = path.file_name() + .ok_or(include_error(err))?; + with_ext.pop(); // Remove old filename. + // Build new filename with `.sex` appended. + let mut new_filename = OsString::new(); + new_filename.push(filename); + new_filename.push(".sex"); + with_ext.push(new_filename); // Replace with new filename. + match super::parser_for_file(&with_ext) { + Ok(parser) => Ok(parser), + Err(err) => Err(include_error(Box::new(err))) + } + }); + if parser.is_ok() { break; } + } + // Register the parser for the found file. + let parser = self.register_parser(parser?); + let tree = match parser.parse() { + Ok(tree) => tree, + Err(error) => return Err(ExpansionError( + format!("{}", error), node.site().to_owned())) + }; + + // Build new (expanded) tree, with result of previous + // parse, while recursively expanding each branch in the + // tree too, as they are added. + let mut expanded_tree = Vec::with_capacity(tree.len()); + for branch in tree { + expanded_tree.extend(self.expand_node(branch)?); + } + // First node should inherit leading whitespace from (%include ...) list. + if expanded_tree.len() != 0 { + expanded_tree[0].set_leading_whitespace(node.leading_whitespace().to_owned()); + } + Ok(expanded_tree.into_boxed_slice()) + } + + fn expand_embed_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?; + let [path_node] = &*params else { + return Err(ExpansionError( + format!("Incorrect number of arguments \ + to `%embed' macro. Got {}, expected {}.", + params.len(), 1), + node.site().to_owned())); + }; + + let Some(Node { value: path, site, .. }) = path_node.atomic() else { + return Err(ExpansionError( + "Bad argument to `%embed' macro.\n\ + Expected a path, but did not get any value + that could be interpreted as a path.".to_string(), + path_node.site().to_owned())) + }; + + // Open file, and read contents! + let embed_error = |error: Box<dyn Display>| ExpansionError( + format!("{}", error), site.to_owned()); + let mut value: Result<String, ExpansionError> = Err( + embed_error(Box::new("No path tested."))); + // Try all include directories until one is succesful. + for include_dir in &self.includes { + let path = include_dir.join(path); + value = std::fs::read_to_string(path) + .map_err(|err| embed_error(Box::new(err))); + if value.is_ok() { break; } + } + let value = value?; + Ok(Box::new([ + ParseNode::String(Node { + value, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }), + ])) + } + + fn expand_date_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let params = self.expand_nodes(params)?; + let [date_format] = &*params else { + return Err(ExpansionError::new( + "`%date' macro only expects one formatting argument.", + node.site())) + }; + + let Some(Node { value: date_format, .. }) = date_format.atomic() else { + return Err(ExpansionError::new( + "`%date' macro needs string (or atomic) \ + formatting argument.", node.site())) + }; + + let now = chrono::Local::now(); + let formatted = now.format(&date_format).to_string(); + let date_string_node = ParseNode::String(Node { + value: formatted, + site: node.site().clone(), + leading_whitespace: node.leading_whitespace().to_string(), + }); + Ok(Box::new([date_string_node])) + } + + /// `(%log ...)` logs to `STDERR` when called and leaves *no* node behind. + /// This means whitespace preceeding `(%log ...)` will be removed! + fn expand_log_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut words = Vec::with_capacity(params.len()); + for param in self.expand_nodes(params)? { + if let Some(word) = param.atomic() { + words.push(word.value.clone()); + } else { + return Err(ExpansionError::new("`log` should only take \ + arguments that are either symbols, strings or numbers.", + node.site())); + } + } + + eprintln!("{} {} {}: {}", "[#]".bold(), "log".bold().yellow(), + node.site(), words.join(" ")); + Ok(Box::new([])) + } + + fn expand_os_env_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let [ref var] = *params else { + return Err(ExpansionError::new( + "`%os/env' expects excatly one argument.", + node.site())); + }; + let Some(var) = var.atomic() else { + return Err(ExpansionError::new( + "`%os/env' argument must be atomic (not a list).", + var.site())); + }; + let Node { site, leading_whitespace, .. } = var.clone(); + let Ok(value) = std::env::var(&var.value) else { + return Err(ExpansionError( + format!("No such environment variable ($`{}') visible.", &var.value), + site)); + }; + Ok(Box::new([ + ParseNode::String(Node { value, site, leading_whitespace }), + ])) + } + + fn expand_format_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let [format_str, ..] = &*params else { + return Err(ExpansionError::new( + "`%format' expects at a format-string.", + node.site())); + }; + let ParseNode::String(format_str) = format_str else { + return Err(ExpansionError::new( + "First argument to `%format' must be a string.", + format_str.site())); + }; + // Iterate and collect format arguments. + let mut arguments = params.iter(); + let _ = arguments.next(); // Skip the format-string. + let Ok(mut template) = formatx::Template::new(&format_str.value) else { + return Err(ExpansionError::new( + "Invalid format string.", + &format_str.site)); + }; + for mut var in arguments { + // Check if we're replacing a named or positional placeholder. + let mut named: Option<&str> = None; + if let ParseNode::Attribute { keyword, node, .. } = var { + named = Some(keyword.as_str()); + var = node; + } + // TODO: Somehow let non-atomic values be formattable? + let Some(Node { value, .. }) = var.atomic() else { + return Err(ExpansionError( + format!("In `%format', the compound {} type is not formattable.", + var.node_type()), + var.site().clone())); + }; + // Replace the placeholder. + match named { + Some(name) => template.replace(name, value), + None => template.replace_positional(value), + } + } + // Template has been constructed, so now attempt to do subsitituions and + // render the formatted string. + match template.text() { + Ok(value) => Ok(Box::new([ + ParseNode::String(Node { + value, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }) + ])), + Err(err) => Err(ExpansionError( + format!("Failed to format string: {}", err.message()), + format_str.site.clone())) + } + } + + fn expand_namespace_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Start evaluating all the arguments to the macro in a separate context. + let context = self.clone(); + let params = context.expand_nodes(params)?; + let mut args = params.iter().peekable(); + let Some(namespace) = args.next().and_then(ParseNode::atomic) else { + return Err(ExpansionError::new("Expected a namespace name.", node.site())); + }; + // Parse options to macro. + let mut seperator = "/"; // Default namespace seperator is `/`. + while let Some(ParseNode::Attribute { keyword, node, site, .. }) = args.peek() { + let _ = args.next(); + match keyword.as_str() { + "separator" => match node.atomic() { + Some(Node { value, .. }) => seperator = &value, + None => return Err(ExpansionError( + format!("`%namespace' separator must be a symbol, got a {}.", node.node_type()), + node.owned_site())), + }, + opt => return Err(ExpansionError( + format!("Unknown option `:{}' to `%namespace' macro.", opt), + site.clone())), + } + } + // Find all the definitions made within the context of the + // `%namespace` macro and include the defintion prefixed by + // the namespace in the *current* scope. + { + let mut self_defs = self.definitions.borrow_mut(); + let defs = context.definitions.borrow(); + for (key, value) in defs.iter() { + let new_key = format!("{}{}{}", namespace.value, seperator, key); + self_defs.insert(new_key, value.clone()); + } + } + // Return remaining body of the macro. + Ok(args.cloned().collect()) + } + + fn expand_raw_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut builder = String::new(); + let args = self.expand_nodes(params)?; + for arg in args { + let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else { + return Err(ExpansionError( + format!("Expected a literal, found a {} node instead.", arg.node_type()), + arg.owned_site())); + }; + builder += leading_whitespace; + builder += value; + } + Ok(Box::new([ + ParseNode::Raw(Node { + value: builder, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }) + ])) + } + + fn expand_string_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut builder = String::new(); + let args = self.expand_nodes(params)?; + for arg in args { + let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else { + return Err(ExpansionError( + format!("Expected a literal, found a {} node instead.", arg.node_type()), + arg.owned_site())); + }; + builder += leading_whitespace; + builder += value; + } + Ok(Box::new([ + ParseNode::String(Node { + value: builder, + site: node.owned_site(), + leading_whitespace: node.leading_whitespace().to_owned(), + }) + ])) + } + + fn expand_join_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let args: ArgRules = arguments! { + mandatory(1): literal, + mandatory(2): number fn(_v: ParseNode) { true }, + optional("trailing"): literal["true", "false"], + rest: literal, + }; + let arg_parser = args.parser(&params); + + todo!() + } + + fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Eagerly evaluate parameters passed to macro invocation. + let params = self.expand_nodes(params)?; + + let Some(mac) = self.get_variable(name) else { + return Err(ExpansionError::new( + &format!("Macro not found (`{}').", name), &node.owned_site())) + }; + + // Instance of expansion subcontext. + let subcontext = self.create_subcontext(); + // Check enough arguments were given. + if params.len() != mac.params.len() { + return Err(ExpansionError( + format!("`%{}` macro expects {} arguments, \ + but {} were given.", &mac.name, mac.params.len(), + params.len()), node.site().to_owned())); + } + // Define arguments for body. + for i in 0..params.len() { + let arg_macro = Macro { + name: mac.params[i].to_owned(), + params: Box::new([]), + body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site. + }; + subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro)); + } + // Expand body. + let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec(); + // Inherit leading whitespace of invocation. + if let Some(first_node) = expanded.get_mut(0) { + first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); + } + Ok(expanded.into_boxed_slice()) + } + + fn expand_invocation(&self, + name: &str, //< Name of macro (e.g. %define). + node: &ParseNode<'a>, //< Node for `%'-macro invocation. + params: Box<[ParseNode<'a>]> //< Passed in arguments. + ) -> Result<ParseTree<'a>, ExpansionError<'a>> { + // Some macros are lazy (e.g. `ifdef`), so each macro has to + // expand the macros in its arguments individually. + match name { + "define" => self.expand_define_macro(node, params), + "ifdef" => self.expand_ifdef_macro(node, params), + "raw" => self.expand_raw_macro(node, params), + "string" => self.expand_string_macro(node, params), + "include" => self.expand_include_macro(node, params), + "embed" => self.expand_embed_macro(node, params), + "namespace" => self.expand_namespace_macro(node, params), + "date" => self.expand_date_macro(node, params), + "join" => self.expand_join_macro(node, params), + "log" => self.expand_log_macro(node, params), + "format" => self.expand_format_macro(node, params), + "os/env" => self.expand_os_env_macro(node, params), + _ => self.expand_macro(name, node, params), + } + } + + pub fn expand_node(&self, node: ParseNode<'a>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + match node { + ParseNode::Symbol(ref sym) => { + // Check if symbol starts with %... and replace it + // with it's defined value. + if sym.value.starts_with("%") { + let name = &sym.value[1..]; + if let Some(def) = self.get_variable(name) { + if !def.params.is_empty() { // Should not be a function. + return Err(ExpansionError::new( + &format!("`{}` is a macro that takes arguments, \ + and cannot be used as a variable.", name), + &sym.site)) + } + Ok(def.body.clone()) + } else { // Not found. + Err(ExpansionError( + format!("No such macro, `{}`.", name), + sym.site.to_owned())) + } + } else { + Ok(Box::new([node])) + } + }, + ParseNode::List { ref nodes, ref site, ref end_token, ref leading_whitespace } => { + // Check for macro invocation (%_ _ _ _). + // Recurse over every element. + let len = nodes.len(); + let mut call = nodes.to_vec().into_iter(); + let head = call.next(); + + // Pathway: (%_ _ _) macro invocation. + if let Some(ref symbol@ParseNode::Symbol(..)) = head { + let node = self.register_invocation(node.clone()); + let name = symbol.atomic().unwrap().value.clone(); + if name.starts_with("%") { + // Rebuild node... + let name = &name[1..]; + let mut params: Vec<ParseNode> = call.collect(); + // Delete leading whitespace of leading argument. + if let Some(leading) = params.first_mut() { + if !leading.leading_whitespace().contains('\n') { + leading.set_leading_whitespace(String::from("")); + } + } + return self.expand_invocation(name, node, params.into_boxed_slice()); + } + } + // Otherwise, if not a macro, just expand child nodes incase they are macros. + let mut expanded_list = Vec::with_capacity(len); + expanded_list.extend(self.expand_node(head.unwrap().clone())?); + for elem in call { + expanded_list.extend(self.expand_node(elem)?); + } + + Ok(Box::new([ParseNode::List { + nodes: expanded_list.into_boxed_slice(), + site: site.clone(), + end_token: end_token.clone(), + leading_whitespace: leading_whitespace.clone(), + }])) + }, + ParseNode::Attribute { keyword, node, site, leading_whitespace } => { + let mut expanded_nodes = self.expand_node(*node)?; + let new_node = Box::new(expanded_nodes[0].clone()); + expanded_nodes[0] = ParseNode::Attribute { + keyword: keyword.clone(), + node: new_node, + site: site.clone(), + leading_whitespace: leading_whitespace.clone(), + }; + Ok(expanded_nodes) + }, + _ => Ok(Box::new([node])) + } + } + + pub fn expand_nodes(&self, tree: Box<[ParseNode<'a>]>) + -> Result<ParseTree<'a>, ExpansionError<'a>> { + let mut expanded = Vec::with_capacity(tree.len()); + for branch in tree { + expanded.extend(self.expand_node(branch)?); + } + Ok(expanded.into_boxed_slice()) + } + + pub fn expand(&'a self) -> Result<ParseTree<'a>, Box<dyn 'a + std::error::Error>> { + let tree = self.parser.parse()?; + let expanded = self.expand_nodes(tree)?; + Ok(expanded) + } +} diff --git a/src/parse/lexer.rs b/crates/seam/src/parse/lexer.rs diff --git a/crates/seam/src/parse/macros.rs b/crates/seam/src/parse/macros.rs @@ -0,0 +1,394 @@ +//! Expander macros argument parsing utilities. +use std::{borrow::Borrow, collections::HashMap}; + +use regex::Regex; + +use super::{ + expander::ExpansionError, + parser::{Node, ParseNode, ParseTree}, +}; + +pub enum ArgPredicate { + Exactly(String), + Matching(Regex), + Satisfying(Box<dyn Fn(ParseNode) -> bool>), +} + +/// Type of argument, and what kind of +/// conditions they have to satisfy. +/// Number ⊆ Literal; +/// String ⊆ Literal; +/// Symbol ⊆ Symbolic; +/// Number ⊆ Symbolic; +/// Symbolic ⊆ Literal; +/// * ⊆ Any. +pub enum ArgType { + Literal(Vec<ArgPredicate>), + String(Vec<ArgPredicate>), + Symbol(Vec<ArgPredicate>), + Number(Vec<ArgPredicate>), + Symbolic(Vec<ArgPredicate>), + List(Vec<ArgType>), + Any(Vec<ArgType>), +} + +/// Kind of arguemnt type (optional, mandatory). +pub enum Arg { + Mandatory(ArgType), + Optional(ArgType), +} + +/// Positonal or named argument position. +enum ArgPos<'a> { Int(usize), Str(&'a str) } +/// What kind of types can be matched against +/// when determining an arguments positionality. +pub trait ArgMatcher { + fn unwrap(&self) -> ArgPos; +} +impl ArgMatcher for usize { + fn unwrap(&self) -> ArgPos { ArgPos::Int(*self) } +} +impl ArgMatcher for &str { + fn unwrap(&self) -> ArgPos { ArgPos::Str(self) } +} +impl From<&Box<dyn ArgMatcher>> for Option<usize> { + fn from(value: &Box<dyn ArgMatcher>) -> Option<usize> { + match value.unwrap() { + ArgPos::Int(int) => Some(int), + _ => None, + } + } +} +impl<'a> From<&'a Box<dyn ArgMatcher + 'a>> for Option<&'a str> { + fn from(value: &'a Box<dyn ArgMatcher + 'a>) -> Option<&'a str> { + match value.unwrap() { + ArgPos::Str(str) => Some(str), + _ => None, + } + } +} +impl From<usize> for Box<dyn ArgMatcher> { + fn from(value: usize) -> Box<dyn ArgMatcher> { Box::new(value) } +} +impl<'a> From<&'a str> for Box<dyn ArgMatcher + 'a> { + fn from(value: &'a str) -> Box<dyn ArgMatcher + 'a> { Box::new(value) } +} +impl<'a> From<&'a String> for Box<dyn ArgMatcher + 'a> { + fn from(value: &'a String) -> Box<dyn ArgMatcher + 'a> { Box::new(value.as_ref()) } +} + +/// Holds information as to what kind rules +/// must be satsified for an argument's given +/// position. +/// Pattern pertains to how to argument sits +/// in the macro-call's argument list. +struct ArgPattern<'a> { + argument: Arg, + pattern: Box<dyn Fn(&Box<dyn ArgMatcher + 'a>) -> bool>, +} + +/// A complete description of how a macro's arguments +/// should be parsed. +pub struct ArgRules<'a> { + patterns: Vec<ArgPattern<'a>>, + trailing: Option<ArgType>, +} + +impl<'a> ArgRules<'a> { + pub fn new() -> Self { + Self { patterns: Vec::new(), trailing: None } + } + /// Register a pattern to match. + pub fn register<F>(&mut self, matcher: F, arg: Arg) + where F: 'static + Fn(&Box<dyn ArgMatcher + 'a>) -> bool + { + self.patterns.push(ArgPattern { + argument: arg, + pattern: Box::new(matcher), + }); + } + /// Register matching on all remaining arguments. + pub fn register_remaining(&mut self, arg_type: ArgType) { + self.trailing = Some(arg_type); + } + /// Turn this structure into a parser. + pub fn parser<'params, 'tree>(self, params: &'params Box<[ParseNode<'tree>]>) -> ArgParser<'params, 'a, 'tree> { + ArgParser::new(self, params) + } +} + +/// Turns a pattern into a argument matching predicate. +macro_rules! predicate { + // A literals which represent a potential exact match of the string values. + ($lit:literal) => { ArgPredicate::Exactly(String::from($lit)) }; + // A pattern which can match against the argument. + ($pat:pat) => {{ + fn matcher(arg: ParseNode) -> bool { + use super::parser::IntoValue; + match arg.into_value() { + Some($pat) => true, + _ => false, + } + } + ArgPredicate::Satisfying(Box::new(matcher)) + }}; +} + +macro_rules! arg_type { + (literal) => { ArgType::Literal }; + (string) => { ArgType::String }; + (symbol) => { ArgType::Symbol }; + (number) => { ArgType::Number }; + (symbolic) => { ArgType::Symbolic }; + (list) => { ArgType::List }; + (any) => { ArgType::Any }; +} + +macro_rules! argument_type { + ($typ:ident) => {{ ArgType::Literal(vec![]) }}; + ($typ:ident[ $($preds:literal),+ ]) => {{ + arg_type!($typ)(vec![ $( predicate!($preds) ),+ ]) + }}; + ($typ:ident ( $($preds:pat),+ )) => {{ + arg_type!($typ)(vec![ $( predicate!($preds) ),+ ]) + }}; + ($typ:ident fn($($var:tt)+) { $($body:tt)* }) => {{ + fn predicate($($var)+) -> bool { $($body)* } + let arg_pred = ArgPredicate::Satisfying(Box::new(predicate)); + arg_type!($typ)(vec![arg_pred]) + }}; +} + +macro_rules! register_position_pattern { + ($ctx:expr, $n:pat, $arg:expr) => { + fn position_matcher(pattern: &Box<dyn ArgMatcher>) -> bool { + match pattern.into() { + Some($n) => true, + _ => false, + } + } + let ctx: &mut ArgRules = $ctx; + let arg: Arg = $arg; + ctx.register(position_matcher, arg); + }; +} + +macro_rules! _argument { + // The pattern for a mandatory argument. + ($ctx:expr => mandatory($n:pat): $($kind:tt)+) => { + { + let arg_type = argument_type!($($kind)+); + let arg = Arg::Mandatory(arg_type); + let ctx: &mut ArgRules = $ctx; + register_position_pattern!(ctx, $n, arg); + } + }; + // The pattern for an optional argument. + ($ctx:expr => optional($n:pat): $($kind:tt)+) => { + { + let arg_type = argument_type!($($kind)+); + let arg = Arg::Optional(arg_type); + let ctx: &mut ArgRules = $ctx; + register_position_pattern!(ctx, $n, arg); + } + }; + // The pattern for an any remaining argument. + ($ctx:expr => rest: $($kind:tt)+) => { + { + let arg_type = argument_type!($($kind)+); + let ctx: &mut ArgRules = $ctx; + ctx.register_remaining(arg_type); + } + }; +} + +/// See <https://stackoverflow.com/a/74971086/13162100>. +#[macro_export] +macro_rules! arguments { + ($ctx:expr => @accumulate [ $($accumulated:tt)* ] [ ]) => { [ $($accumulated)* ] }; + ($ctx:expr => @accumulate [ $($accumulated:tt)* ] [ $($final_line:tt)* ]) => { + [ $($accumulated)* _argument!( $ctx => $($final_line)+ ) ] + }; + ($ctx:expr => @accumulate [ $($accumulated:tt)* ] [ $($this_line:tt)* ] , $($rest:tt)* ) => { + arguments! { + $ctx => @accumulate + [ $($accumulated)* _argument!( $ctx => $($this_line)* ), ] + [ ] $($rest)* + } + }; + ($ctx:expr => @accumulate [ $($accumulated:tt)* ] [ $($this_line:tt)* ] $current:tt $($rest:tt)* ) => { + arguments! { + $ctx => @accumulate + [ $($accumulated)* ] + [ $($this_line)* $current ] + $($rest)* + } + }; + ( $($t:tt)* ) => {{ + let mut ctx = ArgRules::new(); + arguments! { &mut ctx => @accumulate [ ] [ ] $($t)* }; + ctx + }} +} + + +// --- Proc Macro +use seam_argparse_proc_macro::*; + + +// --- + +pub struct ArgParser<'params: 'rules, 'rules, 'tree> { + rules: ArgRules<'rules>, + positional: HashMap<usize, &'params ParseNode<'tree>>, + named: HashMap<String, &'params ParseNode<'tree>>, +} + +impl<'params, 'rules, 'tree> ArgParser<'params, 'rules, 'tree> { + pub fn new(rules: ArgRules<'rules>, + params: &'params ParseTree<'tree>) + -> Result<Self, ExpansionError<'tree>> { + let mut position = 0; + let mut positional = HashMap::with_capacity(params.len()); + let mut named = HashMap::with_capacity(params.len()); + for param in params { + let matcher: Box<dyn ArgMatcher>; + // Register each argument with the parser. + if let ParseNode::Attribute { keyword, node, .. } = param { + named.insert(keyword.to_owned(), node.borrow()); + matcher = keyword.into(); + } else { + positional.insert(position, param); + position += 1; + matcher = position.into(); + } + // Check if they do actually match with any of the rules. + let mut arg_rule = None; + for rule in &rules.patterns { + // First check that there is a valid place for this argument. + let is_valid_argument = (rule.pattern)(&matcher); + if !is_valid_argument { + arg_rule = Some(rule); + break; + } + } + let Some(rule) = arg_rule else { + // Error on fact that an errenous positional or named argument + // has been given. Only error on additional errenous named + // arguemnts if trailing argument capture is enabled. + todo!() + }; + // Now check that the types are satisfied. + let arg = &rule.argument; + // TODO: throw error when mismatched. + } + // After checking all the arguments are *valid*, now check + // that all mandatory arguments are given. + "todo"; + // Now check if trailing (variadic) arguments are permitted + // (otherwise error on unexpected additional arguments). + // And if so, that they all satisfy the trailing argument rule. + "todo"; + + Ok(Self { rules, positional, named, }) + } + + pub fn get<P>(&mut self, key: P) -> Result<ParseNode<'tree>, ExpansionError<'tree>> + where P: Into<Box<dyn ArgMatcher>> + { + let matcher: &Box<dyn ArgMatcher> = &key.into(); + // Go through every pattern that could match against the argument + // position given and check if they match. + for argpat in &self.rules.patterns { + let pat = &argpat.pattern; + let did_match = pat(matcher); + if did_match { + match matcher.unwrap() { + ArgPos::Int(i) => {}, + ArgPos::Str(k) => {}, + } + } + } + + todo!() + } +} + +pub enum _ArgType { + Literal(Vec<ArgPredicate>), + String(Vec<ArgPredicate>), + Symbol(Vec<ArgPredicate>), + Number(Vec<ArgPredicate>), + Symbolic(Vec<ArgPredicate>), + List(Vec<ArgType>), + Any(Vec<ArgType>), +} + +pub fn extract_literal<'a>(node: ParseNode<'a>) -> Result<Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Symbol(lit) + | ParseNode::Number(lit) + | ParseNode::String(lit) + | ParseNode::Raw(lit) => Ok(lit), + _ => Err(ExpansionError( + format!("Expected a literal, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_string<'a>(node: ParseNode<'a>) -> Result<Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::String(string) + | ParseNode::Raw(string) => Ok(string), + _ => Err(ExpansionError( + format!("Expected a string, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_symbol<'a>(node: ParseNode<'a>) -> Result<Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Symbol(sym) => Ok(sym), + _ => Err(ExpansionError( + format!("Expected a symbol, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_number<'a>(node: ParseNode<'a>) -> Result<Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Number(lit) => Ok(lit), + _ => Err(ExpansionError( + format!("Expected a number, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_symbolic<'a>(node: ParseNode<'a>) -> Result<Node<'a>, ExpansionError<'a>> { + match node { + ParseNode::Symbol(sym) + | ParseNode::Number(sym) => Ok(sym), + _ => Err(ExpansionError( + format!("Expected a symbolic literal, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_list<'a>(node: ParseNode<'a>) -> Result<Vec<ParseNode<'a>>, ExpansionError<'a>> { + match node { + ParseNode::List { nodes, .. } => Ok(nodes.to_vec()), + _ => Err(ExpansionError( + format!("Expected a list, got a {} instead.", node.node_type()), + node.owned_site() + )) + } +} + +pub fn extract_any<'a>(node: ParseNode<'a>) -> Result<ParseNode<'a>, ExpansionError<'a>> { + Ok(node) +} diff --git a/crates/seam/src/parse/mod.rs b/crates/seam/src/parse/mod.rs @@ -0,0 +1,17 @@ +pub mod tokens; +pub mod lexer; +pub mod parser; +#[macro_use] +mod macros; +pub mod expander; + +pub use parser::ParseTree; +use std::{fs, path::Path, error::Error}; + +/// Build a parser for a file without expanding macros. +pub fn parser_for_file(path: &Path) -> Result<parser::Parser, Box<dyn Error>> { + let contents = fs::read_to_string(&path)?; + let tokenizer = lexer::Lexer::new(path.to_string_lossy().to_string(), contents); + let builder = parser::Parser::new(tokenizer); + Ok(builder) +} diff --git a/crates/seam/src/parse/parser.rs b/crates/seam/src/parse/parser.rs @@ -0,0 +1,512 @@ +use std::{error::Error, fmt, str::FromStr}; +use unicode_width::UnicodeWidthStr; +use descape::UnescapeExt; + +use super::{lexer::{LexError, Lexer}, tokens::{Kind, Site, Token}}; + +/// The [`Node`] type represents what atomic/literals are parsed +/// into; i.e. not compound types (e.g. lists, attributes). +/// These are just a common storage for the literals in [`ParseNode`]. +#[derive(Debug, Clone)] +pub struct Node<'a> { + pub value: String, + pub site: Site<'a>, + pub leading_whitespace: String, +} + +impl<'a> Node<'a> { + pub fn new(value: &str, site: &Site<'a>, leading_whitespace: &str) -> Self { + Self { + site: site.to_owned(), + value: value.to_owned(), + leading_whitespace: leading_whitespace.to_owned(), + } + } +} + +/// Parse nodes are the components of the syntax tree that +/// the source code is translated into. +/// These nodes are also produced at compile-time by the macro expander. +#[derive(Debug, Clone)] +pub enum ParseNode<'a> { + Symbol(Node<'a>), + Number(Node<'a>), + String(Node<'a>), + Raw(Node<'a>), //< Raw-content strings are not parsed, only expanded by macros. + List { + nodes: Box<[ParseNode<'a>]>, + site: Site<'a>, + end_token: Token<'a>, + leading_whitespace: String, + }, + Attribute { + keyword: String, + node: Box<ParseNode<'a>>, + site: Site<'a>, + leading_whitespace: String, + }, +} + +impl<'a> ParseNode<'a> { + /// Unwrap a literal node if it is a symbol or number. + pub fn symbolic(&self) -> Option<&Node<'a>> { + match self { + Self::Symbol(ref node) + | Self::Number(ref node) => Some(node), + _ => None, + } + } + + /// Unwrap string-like nodes. + pub fn string(&self) -> Option<&Node<'a>> { + match self { + Self::String(ref node) | Self::Raw(ref node) => Some(node), + _ => None, + } + } + + /// Unwrap literal (atomic) nodes into their underlying [`Node`]. + pub fn atomic(&self) -> Option<&Node<'a>> { + match self { + Self::Symbol(ref node) + | Self::Number(ref node) + | Self::String(ref node) + | Self::Raw(ref node) => Some(node), + _ => None, + } + } + + /// Same as [`Self::atomic`], but consumes the node, + /// returning an owned [`Node`]. + pub fn into_atomic(self) -> Option<Node<'a>> { + match self { + Self::Symbol(node) + | Self::Number(node) + | Self::String(node) => Some(node), + _ => None, + } + } + + /// Get a reference to the parse node's underlying [`Site`]. + pub fn site(&self) -> &Site<'a> { + match self { + Self::Symbol(ref node) + | Self::Number(ref node) + | Self::String(ref node) + | Self::Raw(ref node) => &node.site, + Self::List { ref site, .. } => site, + Self::Attribute { ref site, .. } => site, + } + } + + /// Clone the underlying [`Site`] of this parse node. + pub fn owned_site(&self) -> Site<'a> { + match self { + Self::Symbol(node) + | Self::Number(node) + | Self::String(node) + | Self::Raw(node) => node.site.clone(), + Self::List { site, .. } => site.clone(), + Self::Attribute { site, .. } => site.clone(), + } + } + + /// Get a reference to the underlying leading whitespace string + /// of this parse node. + pub fn leading_whitespace(&self) -> &str { + match self { + Self::Symbol(ref node) + | Self::Number(ref node) + | Self::String(ref node) + | Self::Raw(ref node) => &node.leading_whitespace, + Self::List { ref leading_whitespace, .. } => leading_whitespace, + Self::Attribute { ref leading_whitespace, .. } => leading_whitespace, + } + } + + /// Modify the underlying leading whitespace stored for this parse node. + pub fn set_leading_whitespace(&mut self, whitespace: String) { + match self { + Self::Symbol(ref mut node) + | Self::Number(ref mut node) + | Self::String(ref mut node) + | Self::Raw(ref mut node) => node.leading_whitespace = whitespace, + Self::List { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, + Self::Attribute { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, + }; + } + + /// Get a `&'static str` string name of what type of parse node this is. + pub fn node_type(&self) -> &'static str { + match self { + Self::Symbol(..) => "symbol", + Self::Number(..) => "number", + Self::String(..) => "string", + Self::Raw(..) => "raw-content string", + Self::List { .. } => "list", + Self::Attribute { .. } => "attribute", + } + } +} + +/// Trait determining if a [`ParseNode`] can be converted into +/// a value of a given (usually inferred) type. +pub trait IntoValue<'a, T>: Sized { + fn into_value(&'a self) -> Option<T> { None } +} + +/// A number type. +trait Num<Rhs = Self, Output = Self>: + std::ops::Add<Rhs, Output = Output> + + std::ops::Sub<Rhs, Output = Output> + + std::ops::Mul<Rhs, Output = Output> + + std::ops::Div<Rhs, Output = Output> + + std::ops::Rem<Rhs, Output = Output> { } +impl Num for usize { } +impl Num for isize { } +impl Num for u32 { } +impl Num for i32 { } +impl Num for u64 { } +impl Num for i64 { } +impl Num for f32 { } +impl Num for f64 { } + +/// Convert parse-node into value if said value is a number type. +impl<'a, T: Num + FromStr> IntoValue<'a, T> for ParseNode<'a> { + fn into_value(&self) -> Option<T> { + match self { + ParseNode::Number(node) => node.value.parse().ok(), + _ => None, + } + } +} + +/// Convert parse-node into value if said value is a symbol/string type. +impl<'a> IntoValue<'a, &'a str> for ParseNode<'a> { + fn into_value(&'a self) -> Option<&'a str> { + match self { + ParseNode::Symbol(node) + | ParseNode::String(node) + | ParseNode::Raw(node) => Some(node.value.as_ref()), + _ => None, + } + } +} + +/// TODO: Convert parse-node into value if said value is a list type. +/* +impl<'a, V> IntoValue<'a, &'a [V]> for ParseNode<'a> { + fn into_value(&'a self) -> Option<&'a [V]> { + match self { + ParseNode::List { nodes, .. } => { + let mut values = Vec::with_capacity(nodes.len()); + for node in nodes { + let Some(value) = node.into_value() else { + return None; + }; + let value: V = value; + values.push(value) + } + // TODO: fix this. + let values: &[V] = &*Box::leak(values.into_boxed_slice()); + Some(values) + }, + _ => None, + } + } +} +*/ + +/// An array of parse nodes, like in a [`ParseNode::List`], never grows. +/// Hence we prefer the `Box<[...]>` representation over a `Vec<...>`. +pub type ParseTree<'a> = Box<[ParseNode<'a>]>; + +#[derive(Debug, Clone)] +pub struct ParseError<'a>(pub String, pub Site<'a>); + +impl<'a> fmt::Display for ParseError<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ParseError(msg, site) = self; + let line_prefix = format!(" {} |", site.line); + let line_view = site.line_slice(); + writeln!(f, "{} {}", line_prefix, line_view)?; + writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", + prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), + text_offset=site.line_column() - 1, + length=site.width())?; + write!(f, "[**] Parse Error ({}:{}:{}): {}", + site.source, site.line, site.line_column(), msg) + } +} + +impl<'a> Error for ParseError<'a> { } + +/// Parser structure walks through source using lexer. +#[derive(Debug, Clone)] +pub struct Parser { + lexer: Lexer, //< Parser owns a lexer. +} + +impl<'a> Parser { + pub fn new(lexer: Lexer) -> Self { + Self { lexer } + } + + pub fn get_source(&self) -> &str { + self.lexer.get_source() + } + + /// Parse whole source code, finishing off the lexer. + pub fn parse(&'a self) -> Result<ParseTree, Box<dyn Error + 'a>> { + let mut root: Vec<ParseNode> = Vec::new(); + while !self.lexer.eof() { + let expr = self.parse_expr()?; + root.push(expr); + } + return Ok(root.into_boxed_slice()); + } + + /// Produce a parse node from the current position in the lexer. + pub fn parse_expr(&'a self) -> Result<ParseNode, Box<dyn Error + 'a>> { + let token = self.lexer.peek()?; + match token.kind { + Kind::LParen => self.parse_list(), + Kind::RParen => Err(ParseError( + "Unexpected `)' closing parenthesis.".to_owned(), + token.site.to_owned()))?, + Kind::Keyword => self.parse_keyword(), + Kind::Symbol => Ok(ParseNode::Symbol(self.parse_atomic()?)), + // TODO: Parse (escpae) string-literals. + Kind::String => Ok(ParseNode::String(self.parse_atomic()?)), + Kind::Number => Ok(ParseNode::Number(self.parse_atomic()?)), + } + } + + /// Parse keyword-attribute pair. + fn parse_keyword(&'a self) -> Result<ParseNode, Box<dyn Error + 'a>> { + // Consume :keyword token. + let token = self.lexer.consume()?; + assert_eq!(token.kind, Kind::Keyword); + // Check we are able to consume next expression for keyword's value. + { + let no_expr_error = ParseError( + format!("Keyword `:{}' expects an expression follwing it.", token.value), + token.site.to_owned()); + if self.lexer.eof() { Err(no_expr_error.clone())? ;} + match self.lexer.peek()? { + Token { kind: Kind::RParen, .. } => Err(no_expr_error)?, + _ => () + } + } + // Otherwise, parse the value and combine the node. + let value = self.parse_expr()?; + Ok(ParseNode::Attribute { + keyword: token.value.to_owned(), + node: Box::new(value), + site: token.site.to_owned(), + leading_whitespace: token.leading_whitespace.to_owned(), + }) + } + + /// Parse a literal node. + /// This is where escapes in symbols and strings are handled. + fn parse_atomic(&'a self) -> Result<Node<'a>, LexError<'a>> { + let token = self.lexer.consume()?; + let value = match token.kind { + Kind::Symbol | Kind::Number | Kind::Keyword => escape_sanitize(token.value), + Kind::String => escape_string(token.value, &token.site)?, + _ => unreachable!("called `parse_atomic` on non-atomic token."), + }; + Ok(Node { + value, + site: token.site.clone(), + leading_whitespace: token.leading_whitespace.to_string(), + }) + } + + /// Parse a list `( [...] )'. + fn parse_list(&'a self) -> Result<ParseNode<'a>, Box<dyn Error + 'a>> { + // Consumed the `(' token. + let lparen = self.lexer.consume()?; + assert_eq!(lparen.kind, Kind::LParen); + // Collect list elements. + let mut elements = Vec::new(); + let mut rparen: Option<Token> = None; + while !self.lexer.eof() { + // Keep parsing expressions until `)' is reached. + let token = self.lexer.peek()?; + if token.kind == Kind::RParen { + rparen = Some(self.lexer.consume()?); // Swallow up `)'. + break; + } + let expr = self.parse_expr()?; + elements.push(expr); + } + // Closing parenthesis was never found. + let Some(rparen) = rparen else { + return Err(ParseError( + "Expected `)' closing parenthesis.".to_owned(), + lparen.site.to_owned()))?; + }; + Ok(ParseNode::List { + nodes: elements.into_boxed_slice(), + site: lparen.site.to_owned(), + end_token: rparen.to_owned(), + leading_whitespace: lparen.leading_whitespace.to_owned(), + }) + } +} + +/// Santize any escaped characters by removing their leading backslash. +fn escape_sanitize(string: &str) -> String { + let mut builder = String::with_capacity(string.len()); + let mut chars = string.chars(); + while let Some(c) = chars.next() { + if c == '\\' { continue; } + builder.push(c) + } + builder +} + +/// Parse a string with its escapes. +/// **Note:** Uses the `descape` crate for now. +fn escape_string<'a>(string: &'a str, site: &Site<'a>) -> Result<String, LexError<'a>> { + string.to_unescaped() + .map(|s| s.to_string()) + .map_err(|index| { + LexError( + format!("Invalid escape `\\{}' at byte-index {}.", + string.chars().nth(index).unwrap_or('?'), index), + site.clone()) + }) +} + +pub trait SearchTree<'a> { + /// Search the parse-tree for a specific node with a specific value. + fn search_node(&'a self, kind: SearchType, + value: &str, + case_insensitive: bool, + level: usize) -> Option<&ParseNode<'a>>; +} + +#[derive(Clone, Copy, PartialEq)] +pub enum SearchType { + ListHead, ListMember, + Symbol, Number, String, + Attribute, + Any, +} + +impl SearchType { + pub fn is_a(self, kind: SearchType) -> bool { + self == SearchType::Any || self == kind + } +} + +impl<'a> SearchTree<'a> for ParseNode<'a> { + fn search_node(&'a self, kind: SearchType, value: &str, + insensitive: bool, level: usize) -> Option<&ParseNode<'a>> { + if level == 0 { + return None; + } + + let is_equal = |string: &str| if insensitive { + string.to_lowercase() == value.to_lowercase() + } else { + string == value + }; + + match self { + ParseNode::List { nodes, .. } => { + if kind.is_a(SearchType::ListHead) { + if let Some(Some(caller)) = nodes.get(0).map(ParseNode::atomic) { + if is_equal(&caller.value) { + return Some(self); + } + } + } + nodes.search_node(kind, value, insensitive, level - 1) + }, + ParseNode::Symbol(name) => { + if kind.is_a(SearchType::Symbol) && is_equal(&name.value) { + Some(self) + } else { + None + } + }, + ParseNode::String(name) | ParseNode::Raw(name) => { + if kind.is_a(SearchType::String) && is_equal(&name.value) { + Some(self) + } else { + None + } + }, + ParseNode::Number(name) => { + if kind.is_a(SearchType::Number) && is_equal(&name.value) { + Some(self) + } else { + None + } + }, + ParseNode::Attribute { node, ref keyword, .. } => { + if kind.is_a(SearchType::Attribute) { + if is_equal(keyword) { + return Some(node); + } + } + node.search_node(kind, value, insensitive, level - 1) + }, + } + } +} + +impl<'a> SearchTree<'a> for ParseTree<'a> { + fn search_node(&'a self, kind: SearchType, value: &str, + insensitive: bool, level: usize) -> Option<&ParseNode<'a>> { + if level == 0 { + return None; + } + + for node in self { + let found = node.search_node(kind, value, insensitive, level); + if found.is_some() { + return found; + } + } + + None + } +} + +/// Pretty printing for parse nodes. +#[cfg(feature="debug")] +impl<'a> fmt::Display for ParseNode<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ParseNode::Symbol(node) + | ParseNode::Number(node) => write!(f, "{}", &node.value), + ParseNode::String(node) => { + if node.value.trim().is_empty() { + write!(f, "") + } else { + write!(f, "\"{}\"", &node.value) + } + }, + ParseNode::Attribute { keyword, node, .. } => write!(f, ":{} {}", + &keyword, &*node), + ParseNode::List { nodes, .. } => if nodes.len() == 0 { + write!(f, "()") + } else if let [single] = &**nodes { + write!(f, "({})", single) + } else { + write!(f, "({}{})", nodes[0], + nodes[1..].iter().fold(String::new(), |acc, elem| { + let nested = elem.to_string().split('\n') + .fold(String::new(), |acc, e| + acc + "\n " + &e); + acc + &nested + })) + } + } + } +} diff --git a/crates/seam/src/parse/tokens.rs b/crates/seam/src/parse/tokens.rs @@ -0,0 +1,126 @@ +use std::fmt::{self, Display}; +use unicode_width::UnicodeWidthStr; + +/// Precise source-code location a parsed (or lexed) node (or token). +/// Including references to the source-code and path, line number, bytes offsets +/// within the file, including from start of line, and the number of +/// bytes it occupies in the source. +#[derive(Debug, Clone)] +pub struct Site<'a> { + pub source: &'a str, + pub source_code: &'a str, + pub line: usize, + pub bytes_from_start: usize, + pub bytes_from_start_of_line: usize, + pub bytes_span: usize, +} + +/// Dummy (unknown) site. +pub const UNKNOWN_SITE: Site<'static> = Site { + source: "<unknwon>", + source_code: "", + line: 0, + bytes_from_start: 0, + bytes_from_start_of_line: 0, + bytes_span: 0, +}; + +impl<'a> Site<'a> { + pub fn new(source: &'a str, + source_code: &'a str, + line: usize, + bytes_from_start: usize, + bytes_from_start_of_line: usize, + bytes_span: usize) -> Self { + Self { + source, + source_code, + line, + bytes_from_start, + bytes_from_start_of_line, + bytes_span, + } + } + + pub const fn unknown() -> Self { UNKNOWN_SITE } + + /// Byte-offset in source code for start-of-line where this site is. + pub fn start_of_line(&self) -> usize { + self.bytes_from_start - self.bytes_from_start_of_line + } + + /// Find byte-offset in source code of end-of-line where this site is. + pub fn end_of_line(&self) -> usize { + let mut i = self.bytes_from_start; + let bytes = self.source_code.as_bytes(); + while i < self.source_code.len() { + if bytes[i] == '\n' as u8 { + return i; + } + i += 1; + } + return i; + } + + /// Get a string slice into the part of the source-code + /// which occupies the location this site references. + pub fn view(&'a self) -> &'a str { + let start = self.bytes_from_start; + let end = start + self.bytes_span; + &self.source_code[start..end] + } + + /// Get string view into whole line that this site is referencing. + pub fn line_slice(&self) -> &'a str { + &self.source_code[self.start_of_line()..self.end_of_line()] + } + + /// Compute (monospace, terminal) column width of piece of text + /// referenced by this site in the source code. + pub fn width(&self) -> usize { + let text = &self.source_code[self.bytes_from_start..self.bytes_from_start + self.bytes_span]; + UnicodeWidthStr::width(text) + } + + /// Compute which column the site starts at on the line, + /// accounting for the rendered number of columns for each character + /// in a terminal, according to the same procedure as [`Self::width`]. + pub fn line_column(&self) -> usize { + let preceeding = &self.source_code[self.start_of_line()..self.bytes_from_start]; + UnicodeWidthStr::width(preceeding) + 1 + } +} + +impl<'a> Display for Site<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "(")?; + write!(f, "{}:", self.source)?; + write!(f, "{}:{}", self.line, self.line_column())?; + write!(f, ")") + } +} + +/// Kinds of possible tokens. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Kind { + LParen, + RParen, + Symbol, + String, + Number, + Keyword, +} + +#[derive(Debug, Clone)] +pub struct Token<'a> { + pub kind: Kind, + pub value: &'a str, + pub leading_whitespace: &'a str, + pub site: Site<'a>, +} + +impl<'a> Token<'a> { + pub fn new(kind: Kind, value: &'a str, leading_whitespace: &'a str, site: Site<'a>) -> Self { + Self { kind, value, leading_whitespace, site } + } +} diff --git a/crates/seam_argparse_proc_macro/Cargo.toml b/crates/seam_argparse_proc_macro/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "seam_argparse_proc_macro" +license-file = "../../LICENSE" +authors = ["Demonstrandum <samuel@knutsen.co>"] +edition = "2021" + +[lib] +proc-macro = true +name = "seam_argparse_proc_macro" +path = "src/lib.rs" + +[dependencies] +proc-macro2 = "1.0.86" +quote = "1.0.37" +syn = "2.0.77" diff --git a/crates/seam_argparse_proc_macro/src/lib.rs b/crates/seam_argparse_proc_macro/src/lib.rs @@ -0,0 +1,415 @@ +//! Procedural macro for the `arguments! { ... }` +//! macro-argument parser for seam macros. +//! TODO: Convert all `panic!(..)` calls to actual compiler errors. + +use std::{collections::{HashMap, HashSet}, iter::Peekable}; + +use proc_macro; +use proc_macro2::{token_stream::IntoIter, Delimiter, TokenStream, TokenTree}; +use quote::quote; +use syn::{self, + Expr, ExprRange, ExprLit, + Lit, Pat, PatOr, + RangeLimits, +}; + +#[derive(Clone, Copy)] +enum PositionTypes { Mandatroy, Optional, Rest } + +#[derive(Clone, Copy)] +enum ParseState { + ArgumentPosition, //< `mandatory: ...', `optional: ...', `rest: ...'. + PositionPattern(PositionTypes), //< pattern for position or name. +} + +#[derive(Clone)] +enum ArgumentKind { + Literal, + String, + Symbol, + Number, + Symbolic, + List, + Any, + None +} + +#[derive(Clone)] +struct ArgumentProperties { + kind: ArgumentKind, + position_type: PositionTypes, + rust_type: TokenStream, +} + +struct ArgumentStructTypes { + positional: HashMap<usize, ArgumentProperties>, + named: HashMap<String, ArgumentProperties>, + rest: ArgumentProperties, +} + +/// Macro that generates an argument parser and builds a custom struct +/// holding provided arguments, given a schema and the list of arguments. +/// Example: +/// ``` +/// let (parser, args) = arguments! { [&params] +/// mandatory(1..=3): literal, +/// mandatory(4): number fn(_v: ParseNode) { true }, +/// optional("trailing"): literal["true", "false"], +/// rest: number +/// }?; +/// println!("first arg {:?}", args.number.1); // a literal (Node<'a>). +/// println!("second arg {:?}", args.number.2); // a literal (Node<'a>). +/// println!("third arg {:?}", args.number.3); // a literal (Node<'a>). +/// println!("fourth arg {:?}", args.number.4); // a number of any kind (Node<'a>). +/// if let Some(named) = args.trailing { +/// println!("named arg {:?}", named); // the literal "true" or "false". +/// } +/// for arg in args.rest { +/// println!("trailing arg: {:?}", arg); // trailing number args. +/// } +/// ``` +#[proc_macro] +pub fn arguments(stream: proc_macro::TokenStream) -> proc_macro::TokenStream { + let stream: TokenStream = stream.into(); + let stream = stream.into_iter().peekable(); + let mut stream = stream.into_iter(); + + // Parse the provided runtime argument vector. + let Some(args_vec) = stream.next().and_then(|tokens| match tokens { + TokenTree::Group(group) => match + group.stream() + .into_iter() + .collect::<Vec<TokenTree>>() + .as_slice() + { + [params,] => Some(params.clone()), + _ => None, + }, + _ => None, + }) else { + panic!("Vector of arguments not given."); + }; + + // Start building final source-code output. + let mut out: TokenStream = TokenStream::new(); + out.extend(quote! { + let mut rules = crate::parse::macros::ArgRules::new(); + let params: Vec<crate::parse::parser::ParseNode> = #args_vec; + }); + // Initialize keeping track of the custom argument struct types. + let mut arg_struct = ArgumentStructTypes { + positional: HashMap::new(), + named: HashMap::new(), + rest: ArgumentProperties { + kind: ArgumentKind::None, + position_type: PositionTypes::Rest, + rust_type: quote! { () }, + }, + }; + // Parse the argument schema. + let mut parse_state = ParseState::ArgumentPosition; + while let Some(token) = stream.next() { + match parse_state { + ParseState::ArgumentPosition => match token { + TokenTree::Ident(ident) => { + match ident.to_string().as_str() { + "mandatory" => { + parse_state = ParseState::PositionPattern(PositionTypes::Mandatroy); + }, + "optional" => { + parse_state = ParseState::PositionPattern(PositionTypes::Optional); + }, + "rest" => { + parse_state = ParseState::PositionPattern(PositionTypes::Rest); + }, + _ => panic!("Invalid token: `{}`", ident.to_string()), + } + }, + _ => panic!("Invalid token: `{}`", token), + }, + // Parse `rest: ...` + ParseState::PositionPattern(PositionTypes::Rest) => { + // Check we consumed `:` + match token { + TokenTree::Punct(punct) => assert!(punct.as_char() == ':'), + _ => panic!("Invalid token: `{}`", token), + } + let argument_type = parse_argument_type(&mut stream, PositionTypes::Rest); + let arg_type = argument_type.source_code; + let code = quote! {{ + let arg_type = #arg_type; + rules.register_remaining(arg_type); + }}; + out.extend(code); + // Register argument struct type. + let rust_type = argument_type.properties.rust_type; + arg_struct.rest.kind = argument_type.properties.kind; + arg_struct.rest.rust_type = quote! { Vec<#rust_type> }; + }, + ParseState::PositionPattern(pos@PositionTypes::Mandatroy | pos@PositionTypes::Optional) => { + // Parse the pattern for matching argument positions. + let position_pattern = match token { + TokenTree::Group(group) => group.stream(), + _ => panic!("Unexpected token"), + }; + // Skip `:` + let token = stream.next(); + match token { + Some(TokenTree::Punct(punct)) => assert!(punct.as_char() == ':'), + _ => panic!("Invalid token: `{:?}`", token), + } + // Register the argument-position matcher. + let argument_type = parse_argument_type(&mut stream, pos); + let arg_type = argument_type.source_code; + let arg_pos = match pos { + PositionTypes::Mandatroy => quote! { crate::parse::macros::Arg::Mandatory }, + PositionTypes::Optional => quote! { crate::parse::macros::Arg::Optional }, + _ => unreachable!(), + }; + let code = quote! {{ + let arg_type = #arg_type; + let arg_pos = #arg_pos; + fn position_matcher(pattern: &Box<dyn crate::parse::macros::ArgMatcher>) -> bool { + match pattern.into() { + Some(#position_pattern) => true, + _ => false, + } + } + rules.register(position_matcher, arg); + }}; + out.extend(code); + // Register argument struct type. + let rust_type = argument_type.properties.rust_type; + let rust_type = match pos { + PositionTypes::Mandatroy => quote! { #rust_type }, + PositionTypes::Optional => quote! { Option<#rust_type> }, + _ => unreachable!(), + }; + // Take each possible argument position and register the type. + for position in parse_finite_pattern(position_pattern) { + match position { + StringOrInt::String(name) => arg_struct.named.insert(name, ArgumentProperties { + kind: argument_type.properties.kind, + position_type: pos, + rust_type, + }), + StringOrInt::Int(offset) => arg_struct.positional.insert(offset, ArgumentProperties { + kind: argument_type.properties.kind, + position_type: pos, + rust_type, + }), + }; + } + }, + }; + } + + // Build tuple type for arguments structure. + let tuple_len = *arg_struct.positional.keys().max().unwrap_or(&0); + let mut tuple_types = vec![quote! { () }; tuple_len]; + for i in 0..tuple_len { + tuple_types.push(match arg_struct.positional.remove(&i) { + Some(props) => props.rust_type, + None => quote! { () }, + }); + } + // Build named arguments struct fields. + let mut named_arguments: Vec<TokenStream> = vec![]; + for (name, props) in arg_struct.named.iter() { + let rust_type = props.rust_type; + named_arguments.push(quote! { + #name: #rust_type + }); + } + + // TODO: need to iterate the runtime-provided params and extract + // them into the correct type depending on expected type + // (i.e. literal => Node<'a>; list => Vec<ParseNode<'a>; etc.) + // A failure of the above extraction should nicely hand back a + // custom Err() message. + // Optional nodes do not fail, they just get a `None` value. + // While doing this, each extracted argument should be checked + // that it satisfies the supplemental conditions in the schema (predicates). + // Again, if it fails the check, default on an Err() describing in the + // most informative possible way why it failed. + // Finally, no failures should mean a fully populated arguments struct + // can be constructed from the previous arguments, and can be returned. + + // TODO: write reusable `extract_literal` function + // (signature: ParseNode<'a> -> Result<Node<'a>, ExpansionError<'a>>) + // that will give a custom error message for failing to extract. + // e.g. "expected a literal, got a {insert_node_kind}". + + for i in 1..=tuple_len { + let arg_num_name: TokenStream = format!("arg_num_{}", i).parse().unwrap(); + + let code = quote! { + let #arg_num_name = parser.positional.get(); + }; + } + + // Assemble code that builds argument parser context and argument struct. + let rest_rust_type = arg_struct.rest; + let out = out.into_iter(); + quote! { + { + #(#out)*; + struct MyArguments { + number: #(#tuple_types),*, + #(#named_arguments),*, + rest: #rest_rust_type, + } + let rules = rules.clone(); + match crate::parse::macros::ArgParser::new(rules, params) { + Ok(parser) => { + let args_struct = MyArguments { + ... + }; + Ok((parser, args_struct)) // Returns the parser and args from the scope. + }, + Err(e) => e, + } + } + }.into() +} + +#[derive(Clone, PartialEq, Eq, Hash)] +enum StringOrInt { String(String), Int(usize) } + +/// Parse a subset of rust "patterns" that have a finite set of +/// values which will satisfy said pattern. +/// Returns a list of all values which may satisfy it. +/// Restrictions: +/// - exact match (`a`) +/// - ranges (`a..b`, `a..=b`); +/// - or-patterns (`expr1 | expr2`); +/// - values are strings or integers (`a: &str` or `a: usize`). +/// TODO: Re-emit the pattern converted as such: +/// `2..=4 | "hello" | 5` => `Int(2..=4) | String(&'static "hello") | Int(5)` +fn parse_finite_pattern(pat: TokenStream) -> HashSet<StringOrInt> { + let mut set = HashSet::new(); + // Parse the input TokenStream into a syn::Pat + let expr = syn::parse::Parser::parse2(|input: syn::parse::ParseStream| { + Pat::parse_multi(input) + }, pat.into()).expect("failed"); + + // Recursively parse patterns. + fn parse_expr(expr: &Pat, set: &mut HashSet<StringOrInt>) { + match expr { + // Handle literals (integers or strings) + Pat::Lit(ExprLit { lit, .. }) => { + match lit { + Lit::Int(lit_int) => { + set.insert(StringOrInt::Int(lit_int.base10_parse::<usize>().unwrap())); + } + Lit::Str(lit_str) => { + set.insert(StringOrInt::String(lit_str.value())); + } + _ => {} + } + } + // Handle ranges + Pat::Range(ExprRange { + start: Some(start), + end: Some(end), + limits, + .. + }) => { + // Parse the start and end as integers + if let ( + Expr::Lit(ExprLit { lit: Lit::Int(start_lit), .. }), + Expr::Lit(ExprLit { lit: Lit::Int(end_lit), .. }), + ) = (&**start, &**end) { + let start_val = start_lit.base10_parse::<usize>().unwrap(); + let end_val = end_lit.base10_parse::<usize>().unwrap(); + // Enumerate inclusive (`..=`) or exclusive (`..`) range. + match limits { + RangeLimits::HalfOpen(_) => { + for i in start_val..end_val { + set.insert(StringOrInt::Int(i)); + } + }, + RangeLimits::Closed(_) => { + for i in start_val..=end_val { + set.insert(StringOrInt::Int(i)); + } + }, + }; + } + } + // Handle or-patterns + Pat::Or(PatOr { cases, .. }) => { + // For or-patterns, parse both the left and right expressions recursively + for case in cases { + parse_expr(case, set); + } + } + _ => panic!("Unsupported pattern.") + } + } + + parse_expr(&expr, &mut set); + set +} + + +struct ArgumentType { + source_code: TokenStream, + properties: ArgumentProperties, +} + +fn parse_argument_type(stream: &mut Peekable<IntoIter>, position_type: PositionTypes) -> ArgumentType { + use ArgumentKind as AK; + let (kind, rust_type, arg_type) = match stream.next() { + Some(TokenTree::Ident(ident)) => match ident.to_string().as_str() { + "literal" => (AK::Literal, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::Literal }), + "string" => (AK::String, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::String }), + "symbol" => (AK::Symbol, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::Symbol }), + "number" => (AK::Number, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::Number }), + "symbolic" => (AK::Symbolic, quote! { crate::parse::parser::Node<'a> }, quote! { crate::parse::macros::ArgType::Symbolic }), + "list" => (AK::List, quote! { Vec<crate::parse::parser::Node<'a>> }, quote! { crate::parse::macros::ArgType::List }), + "any" => (AK::Any, quote! { crate::parse::parser::ParseNode<'a> }, quote! { crate::parse::macros::ArgType::Any }), + _ => panic!("Invalid argument type: `{}`", ident), + }, + None => panic!("Unexpected EOF"), + _ => panic!("Invalid token type"), + }; + + let token = stream.peek().map(|token| token.clone()); + let source_code = match token { + // Parse a list of potential pattern matches for argument. + Some(TokenTree::Group(group)) => match group.delimiter() { + Delimiter::Bracket | Delimiter::Parenthesis => { + stream.next(); // Consume the list. + let group = group.stream().into_iter(); + quote! { #arg_type(vec![ #(#group),* ]) } + }, + _ => panic!("Unexpected list delimiter"), + }, + // Parse a function which matches the arguemnt. + Some(TokenTree::Ident(ident)) if ident.to_string() == "fn" => { + stream.next(); // Consume the `fn` keyword. + let fn_arguments = match stream.next() { + Some(TokenTree::Group(group)) => group.stream().into_iter(), + None => panic!("Unexpected EOF"), + _ => panic!("Unexpected token"), + }; + let Some(fn_body) = stream.next() else { panic!("Unexpected EOF") }; + quote! {{ + fn predicate(#(#fn_arguments),*) -> bool { #fn_body } + let arg_pred = crate::parse::macros::ArgPredicate::Satisfying(Box::new(predicate)); + #arg_type(vec![arg_pred]) + }} + } + _ => quote! { #arg_type(vec![]) }, + }; + + ArgumentType { + source_code, + properties: ArgumentProperties { + kind, + position_type, + rust_type, + }, + } +} diff --git a/src/assemble/mod.rs b/src/assemble/mod.rs @@ -1,160 +0,0 @@ -use crate::parse::tokens::Site; -use std::{convert, error::Error, fmt::{self, Debug}}; - -use colored::*; -use unicode_width::UnicodeWidthStr; - -/// Error type for specific errors with generating -/// each type of markup. -#[derive(Debug, Clone)] -pub struct GenerationError<'a> { - pub markup: &'static str, - pub message: String, - pub site: Site<'a>, -} - -impl<'a> GenerationError<'a> { - /// Create a new error given the ML, the message, and the site. - pub fn new(ml: &'static str, msg: &str, site: &Site<'a>) -> Self { - Self { - markup: ml, - message: msg.to_owned(), - site: site.to_owned(), - } - } -} - -/// Implement fmt::Display for user-facing error output. -impl<'a> fmt::Display for GenerationError<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let line_prefix = format!(" {} |", self.site.line); - let line_view = self.site.line_slice(); - writeln!(f, "{} {}", line_prefix, line_view)?; - writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", - prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), - text_offset=self.site.line_column() - 1, - length=self.site.width())?; - write!(f, "{}: {}", - format!("[{}] Error Generating {} ({}:{}:{})", - "**".red().bold(), - self.markup.bold(), - self.site.source, - self.site.line, - self.site.line_column(), - ).black(), - self.message) - } -} - -/// Implements std::error::Error. -impl<'a> Error for GenerationError<'a> { } - -/// Convert from an io::Error to a generation error. -impl<'a> From<std::io::Error> for GenerationError<'a> { - fn from(e: std::io::Error) -> Self { - Self { - markup: "<markup>", - message: format!("IO error: {}", e), - site: Site::unknown(), - } - } -} - -/// An fmt::Error can be cast to an equally horribly -/// ambiguous GenerationError. -impl<'a> convert::From<fmt::Error> for GenerationError<'a> { - fn from(e: fmt::Error) -> Self { - Self { - markup: "<markup>", - message: format!("Format buffer error: {}", e), - site: Site::unknown(), - } - } -} - -pub type Formatter<'a> = &'a mut dyn fmt::Write; - -/// Trait for all structs that can generate specific markup -/// for the s-expression tree. -pub trait MarkupFormatter: Debug + CloneBox { - // Required definitions: - /// Similar to fmt in Display/Debug traits, takes in a - /// mutable writable buffer, returns success or a specifc - /// error while generating the markup. - fn generate(&self, buf: Formatter) -> Result<(), GenerationError>; - /// Documentises the input, that's to say, it adds any - /// extra meta-information to the generated markup, if - /// the s-expressions your wrote ommited it. - /// e.g. All XML gets a `<?xml ... ?>` tag added to it. - fn document(&self) -> Result<String, GenerationError>; - // Default definitions: - /// Directly converts the s-expressions into a string - /// containing the markup, unless there was an error. - fn display(&self) -> Result<String, GenerationError> { - let mut buf = String::new(); - self.generate(&mut buf)?; - Ok(buf) - } -} - -/// See: https://stackoverflow.com/a/30353928 -pub trait CloneBox { - fn clone_box(&self) -> *mut (); -} - -impl<'a, T> CloneBox for T where T: Clone + 'a { - fn clone_box(&self) -> *mut () { - Box::<T>::into_raw(Box::new(self.clone())) as *mut () - } -} - -impl<'a> Clone for Box<dyn MarkupFormatter + 'a> { - fn clone(&self) -> Box<dyn MarkupFormatter + 'a> { - unsafe { - *Box::from_raw(self.clone_box() as *mut Self) - } - } -} - -/// Automatically implement fmt::Display as a wrapper around -/// MarkupFormatter::generate, but throws away the useful error message. -impl fmt::Display for dyn MarkupFormatter { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.generate(f).map_err(|_| fmt::Error) - } -} - -/// Parforms the following escapes: -/// - `<` → `&lt;` -/// - `>` → `&gt;` -/// - `"` → `&quot;` -/// - `'` → `&apos;` -/// - `&` → `&amp;` -pub fn escape_xml(string: &str) -> String { - let mut bytes = string.bytes(); - let mut byte_builder: Vec<u8> = Vec::with_capacity(bytes.len()); - while let Some(byte) = bytes.next() { - match byte { - b'<' => byte_builder.extend(b"&lt;"), - b'>' => byte_builder.extend(b"&gt;"), - b'"' => byte_builder.extend(b"&quot;"), - b'\'' => byte_builder.extend(b"&apos;"), - b'&' => byte_builder.extend(b"&amp;"), - _ => byte_builder.push(byte) - } - } - unsafe { - String::from_utf8_unchecked(byte_builder) - } -} - -/// Re-constitute original S-expressions. -pub mod sexp; -/// Converts source into expanded plain-text. -pub mod text; -/// XML generation. -pub mod xml; -/// HTML5 CSS generation. -pub mod css; -/// HTML5 HTML generation. -pub mod html; diff --git a/src/lib.rs b/src/lib.rs @@ -1,39 +0,0 @@ -#![allow(incomplete_features)] -#![feature(pattern)] -#![feature(associated_type_defaults)] - -pub mod parse; -pub mod assemble; - -use parse::{expander, parser, lexer}; - -use std::{fs, io, path::Path}; - -pub const VERSION: (u8, u8, u8) = (0, 3, 0); - -pub fn tree_builder<'a, P: AsRef<Path>>(source_path: Option<P>, string: String) - -> expander::Expander<'a> { - let path = source_path.map_or("<stdin>".to_string(), - |s| s.as_ref().to_string_lossy().to_string()); - let tokenizer = lexer::Lexer::new(path, string); - let builder = parser::Parser::new(tokenizer); - expander::Expander::new(builder) -} - -pub fn tree_builder_file<'a>(path: &Path) - -> io::Result<expander::Expander<'a>> { - let contents = fs::read_to_string(&path)?; - Ok(tree_builder(Some(path), contents)) -} - -pub fn tree_builder_stream(stream: &mut impl io::Read) - -> io::Result<expander::Expander> { - let mut contents = String::new(); - stream.read_to_string(&mut contents)?; - Ok(tree_builder(Option::<&Path>::None, contents)) -} - -pub fn main() { - eprintln!("Library main function should not be used."); - std::process::exit(1); -} diff --git a/src/parse/expander.rs b/src/parse/expander.rs @@ -1,739 +0,0 @@ -use super::parser::{Node, ParseNode, ParseTree, Parser}; -use super::tokens::Site; - -use std::fmt::Display; -use std::{ - fmt, - cell::RefCell, - path::PathBuf, - ffi::OsString, - error::Error, - rc::Rc, - collections::{ - HashMap, - BTreeSet, - }, -}; - -use colored::*; -use formatx; -use unicode_width::UnicodeWidthStr; - -/// Error type for errors while expanding macros. -#[derive(Debug, Clone)] -pub struct ExpansionError<'a>(pub String, pub Site<'a>); - -impl<'a> ExpansionError<'a> { - /// Create a new error given the ML, the message, and the site. - pub fn new(msg: &str, site: &Site<'a>) -> Self { - Self(msg.to_owned(), site.to_owned()) - } -} - -/// Implement fmt::Display for user-facing error output. -impl<'a> fmt::Display for ExpansionError<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let ExpansionError(msg, site) = self; - let line_prefix = format!(" {} |", site.line); - let line_view = site.line_slice(); - writeln!(f, "{} {}", line_prefix, line_view)?; - writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", - prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), - text_offset=site.line_column() - 1, - length=site.width())?; - write!(f, "[{}] Error Expanding Macro {}: {}", - "**".red().bold(), site, msg) - } -} - -/// Implements std::error::Error for macro expansion error. -impl<'a> Error for ExpansionError<'a> { } - -/// A macro consists of: -/// - its name; -/// - its argument list (if any); -/// - and its defintion (i.e. *body*). -#[derive(Debug, Clone)] -pub struct Macro<'a> { - name: String, - params: Box<[String]>, - body: Box<[ParseNode<'a>]> -} -// TODO: Macro to also store its own scope (at place of definition) -// in order to implement lexical scoping. - -impl<'a> Macro<'a> { - pub fn new(name: &str) -> Macro { - Macro { - name: name.to_string(), - params: Box::new([]), - body: Box::new([]), - } - } -} - -/// Type of variable scope owned by an `Expander` instance. -pub type Scope<'a> = RefCell<HashMap<String, Rc<Macro<'a>>>>; // Can you believe this type? - -#[derive(Debug, Clone)] -pub struct Expander<'a> { - parser: Parser, - includes: BTreeSet<PathBuf>, - subparsers: RefCell<Vec<Parser>>, - subcontexts: RefCell<Vec<Self>>, - invocations: RefCell<Vec<ParseNode<'a>>>, - definitions: Scope<'a>, -} - -impl<'a> Expander<'a> { - pub fn new(parser: Parser) -> Self { - Self { - parser, - includes: BTreeSet::from([PathBuf::from(".")]), - subparsers: RefCell::new(Vec::new()), - subcontexts: RefCell::new(Vec::new()), - invocations: RefCell::new(Vec::new()), - definitions: RefCell::new(HashMap::new()), - } - } - - /// Get underlying source-code of the active parser for current unit. - pub fn get_source(&self) -> &str { - self.parser.get_source() - } - - pub fn add_includes<T: Iterator>(&mut self, dirs: T) - where T::Item: Into<PathBuf> - { - for dir in dirs { - self.includes.insert(dir.into()); - } - } - - /// Add a subparser owned by the expander context. - fn register_parser(&self, parser: Parser) -> &'a Parser { - { - let mut parsers = self.subparsers.borrow_mut(); - parsers.push(parser); - } - self.latest_parser().unwrap() - } - - /// Get the latest subparser added. - fn latest_parser(&self) -> Option<&'a Parser> { - let p = self.subparsers.as_ptr(); - unsafe { (*p).last() } - } - - /// Create and register a subcontext built from the current context. - fn create_subcontext(&self) -> &mut Self { - { - let copy = self.clone(); - let mut contexts = self.subcontexts.borrow_mut(); - contexts.push(copy); - } - self.latest_context().unwrap() - } - - /// Get the latest subparser added. - fn latest_context(&self) -> Option<&mut Self> { - let contexts = self.subcontexts.as_ptr(); - unsafe { (*contexts).last_mut() } - } - - fn register_invocation(&self, node: ParseNode<'a>) -> &ParseNode<'a> { - let invocations = self.invocations.as_ptr(); - unsafe { - (*invocations).push(node); - (*invocations).last().unwrap() - } - } - - /// Update variable (macro) for this scope. - fn insert_variable(&self, name: String, var: Rc<Macro<'a>>) { - let mut defs = self.definitions.borrow_mut(); - defs.insert(name, var); - } - - /// Check if macro exists in this scope. - fn has_variable(&self, name: &str) -> bool { - let defs = self.definitions.borrow(); - defs.contains_key(name) - } - - fn get_variable(&self, name: &str) -> Option<Rc<Macro<'a>>> { - self.definitions.borrow().get(name).map(|m| m.clone()) - } - - /// Define a macro with `(%define a b)` --- `a` is a symbol or a list `(c ...)` where `c` is a symbol. - /// macro definitions will eliminate any preceding whitespace, so make sure trailing whitespace provides - /// the whitespace you need. - fn expand_define_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let [head, nodes@..] = &*params else { - return Err(ExpansionError( - format!("`%define` macro takes at least \ - two (2) arguments ({} were given.", params.len()), - node.owned_site())); - }; - - // If head is atomic, we assign to a 'variable'. - // Aditionally, we evaluate its body *eagerly*. - let def_macro = if let Some(variable) = head.atomic() { - let nodes = nodes.to_owned().into_boxed_slice(); - let body = self.expand_nodes(nodes)?; - Rc::new(Macro { - name: variable.value.clone(), - params: Box::new([]), - body, - }) - } else { // Otherwise, we are assigning to a 'function'. - let ParseNode::List { nodes: defn_nodes, .. } = head else { - return Err(ExpansionError( - "First argument of `%define` macro must be a list \ - or variable name/identifier.".to_owned(), - node.site().to_owned())); - }; - let [name, params@..] = &**defn_nodes else { - return Err(ExpansionError( - "`%define` macro definition must at \ - least have a name.".to_owned(), - node.site().to_owned())); - }; - let mut arguments: Vec<String> = Vec::with_capacity(params.len()); - for param_node in params { // Verify arguments are symbols. - if let ParseNode::Symbol(param) = param_node { - arguments.push(param.value.clone()); - } else { - return Err(ExpansionError( - "`define` function arguments must be \ - symbols/identifers.".to_owned(), - node.site().to_owned())); - }; - } - let ParseNode::Symbol(name_node) = name else { - return Err(ExpansionError( - "`define` function name must be \ - a symbol/identifier.".to_owned(), - node.site().to_owned())); - }; - let name = name_node.value.clone(); - - Rc::new(Macro { - name, - params: arguments.into_boxed_slice(), - body: nodes.to_owned().into_boxed_slice(), - }) - }; - - self.insert_variable(def_macro.name.to_owned(), def_macro); - Ok(Box::new([])) - } - - /// `(%ifdef symbol a b)` --- `b` is optional, however, if not provided *and* - /// the symbol is not defined, it will erase the whole expression, and whitespace will not - /// be preseved before it. If that's a concern, provide `b` as the empty string `""`. - fn expand_ifdef_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - if params.len() < 2 || params.len() > 3 { - return Err(ExpansionError(format!("`ifdef` takes one (1) \ - condition and one (1) consequent, a third optional \ - alternative expression may also be provided, but \ - `ifdef` was given {} arguments.", params.len()), - node.site().to_owned())); - } - let symbol = if let Some(node) = params[0].atomic() { - node.value.to_owned() - } else { - // FIXME: Borrow-checker won't let me use params[0].site() as site! - return Err(ExpansionError( - "The first argument to `ifdef` must be a symbol/name.".to_string(), - node.site().clone())); - }; - - let mut expanded = if self.has_variable(&symbol) { - self.expand_node(params[1].clone())? - } else { - if let Some(alt) = params.get(2) { - self.expand_node(alt.clone())? - } else { - Box::new([]) - } - }; - if let Some(first_node) = expanded.get_mut(0) { - first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); - } - Ok(expanded) - } - - fn expand_include_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?; - let [path_node] = &*params else { - return Err(ExpansionError( - format!("Incorrect number of arguments \ - to `%include' macro. Got {}, expected {}.", - params.len(), 1), - node.site().to_owned())); - }; - - let Some(Node { value: path, site, .. }) = path_node.atomic() else { - return Err(ExpansionError( - "Bad argument to `%include' macro.\n\ - Expected a path, but did not get any value - that could be interpreted as a path.".to_string(), - path_node.site().to_owned())) - }; - - // Open file, and parse contents! - let include_error = |error: Box<dyn Display>| ExpansionError( - format!("{}", error), site.to_owned()); - let mut parser: Result<Parser, ExpansionError> = Err( - include_error(Box::new("No path tested."))); - // Try all include directories until one is succesful. - for include_dir in &self.includes { - let path = include_dir.join(path); - parser = super::parser_for_file(&path) - .or_else(|err| { - let err = Box::new(err); - // Try with `.sex` extensions appended. - let mut with_ext = PathBuf::from(&path); - let filename = path.file_name() - .ok_or(include_error(err))?; - with_ext.pop(); // Remove old filename. - // Build new filename with `.sex` appended. - let mut new_filename = OsString::new(); - new_filename.push(filename); - new_filename.push(".sex"); - with_ext.push(new_filename); // Replace with new filename. - match super::parser_for_file(&with_ext) { - Ok(parser) => Ok(parser), - Err(err) => Err(include_error(Box::new(err))) - } - }); - if parser.is_ok() { break; } - } - // Register the parser for the found file. - let parser = self.register_parser(parser?); - let tree = match parser.parse() { - Ok(tree) => tree, - Err(error) => return Err(ExpansionError( - format!("{}", error), node.site().to_owned())) - }; - - // Build new (expanded) tree, with result of previous - // parse, while recursively expanding each branch in the - // tree too, as they are added. - let mut expanded_tree = Vec::with_capacity(tree.len()); - for branch in tree { - expanded_tree.extend(self.expand_node(branch)?); - } - // First node should inherit leading whitespace from (%include ...) list. - if expanded_tree.len() != 0 { - expanded_tree[0].set_leading_whitespace(node.leading_whitespace().to_owned()); - } - Ok(expanded_tree.into_boxed_slice()) - } - - fn expand_embed_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let params: Box<[ParseNode<'a>]> = self.expand_nodes(params)?; - let [path_node] = &*params else { - return Err(ExpansionError( - format!("Incorrect number of arguments \ - to `%embed' macro. Got {}, expected {}.", - params.len(), 1), - node.site().to_owned())); - }; - - let Some(Node { value: path, site, .. }) = path_node.atomic() else { - return Err(ExpansionError( - "Bad argument to `%embed' macro.\n\ - Expected a path, but did not get any value - that could be interpreted as a path.".to_string(), - path_node.site().to_owned())) - }; - - // Open file, and read contents! - let embed_error = |error: Box<dyn Display>| ExpansionError( - format!("{}", error), site.to_owned()); - let mut value: Result<String, ExpansionError> = Err( - embed_error(Box::new("No path tested."))); - // Try all include directories until one is succesful. - for include_dir in &self.includes { - let path = include_dir.join(path); - value = std::fs::read_to_string(path) - .map_err(|err| embed_error(Box::new(err))); - if value.is_ok() { break; } - } - let value = value?; - Ok(Box::new([ - ParseNode::String(Node { - value, - site: node.owned_site(), - leading_whitespace: node.leading_whitespace().to_owned(), - }), - ])) - } - - fn expand_date_macro(&self, node: &ParseNode<'a>, params: Box<[ParseNode<'a>]>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let params = self.expand_nodes(params)?; - let [date_format] = &*params else { - return Err(ExpansionError::new( - "`%date' macro only expects one formatting argument.", - node.site())) - }; - - let Some(Node { value: date_format, .. }) = date_format.atomic() else { - return Err(ExpansionError::new( - "`%date' macro needs string (or atomic) \ - formatting argument.", node.site())) - }; - - let now = chrono::Local::now(); - let formatted = now.format(&date_format).to_string(); - let date_string_node = ParseNode::String(Node { - value: formatted, - site: node.site().clone(), - leading_whitespace: node.leading_whitespace().to_string(), - }); - Ok(Box::new([date_string_node])) - } - - /// `(%log ...)` logs to `STDERR` when called and leaves *no* node behind. - /// This means whitespace preceeding `(%log ...)` will be removed! - fn expand_log_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let mut words = Vec::with_capacity(params.len()); - for param in self.expand_nodes(params)? { - if let Some(word) = param.atomic() { - words.push(word.value.clone()); - } else { - return Err(ExpansionError::new("`log` should only take \ - arguments that are either symbols, strings or numbers.", - node.site())); - } - } - - eprintln!("{} {} {}: {}", "[#]".bold(), "log".bold().yellow(), - node.site(), words.join(" ")); - Ok(Box::new([])) - } - - fn expand_os_env_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let [ref var] = *params else { - return Err(ExpansionError::new( - "`%os/env' expects excatly one argument.", - node.site())); - }; - let Some(var) = var.atomic() else { - return Err(ExpansionError::new( - "`%os/env' argument must be atomic (not a list).", - var.site())); - }; - let Node { site, leading_whitespace, .. } = var.clone(); - let Ok(value) = std::env::var(&var.value) else { - return Err(ExpansionError( - format!("No such environment variable ($`{}') visible.", &var.value), - site)); - }; - Ok(Box::new([ - ParseNode::String(Node { value, site, leading_whitespace }), - ])) - } - - fn expand_format_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let [format_str, ..] = &*params else { - return Err(ExpansionError::new( - "`%format' expects at a format-string.", - node.site())); - }; - let ParseNode::String(format_str) = format_str else { - return Err(ExpansionError::new( - "First argument to `%format' must be a string.", - format_str.site())); - }; - // Iterate and collect format arguments. - let mut arguments = params.iter(); - let _ = arguments.next(); // Skip the format-string. - let Ok(mut template) = formatx::Template::new(&format_str.value) else { - return Err(ExpansionError::new( - "Invalid format string.", - &format_str.site)); - }; - for mut var in arguments { - // Check if we're replacing a named or positional placeholder. - let mut named: Option<&str> = None; - if let ParseNode::Attribute { keyword, node, .. } = var { - named = Some(keyword.as_str()); - var = node; - } - // TODO: Somehow let non-atomic values be formattable? - let Some(Node { value, .. }) = var.atomic() else { - return Err(ExpansionError( - format!("In `%format', the compound {} type is not formattable.", - var.node_type()), - var.site().clone())); - }; - // Replace the placeholder. - match named { - Some(name) => template.replace(name, value), - None => template.replace_positional(value), - } - } - // Template has been constructed, so now attempt to do subsitituions and - // render the formatted string. - match template.text() { - Ok(value) => Ok(Box::new([ - ParseNode::String(Node { - value, - site: node.owned_site(), - leading_whitespace: node.leading_whitespace().to_owned(), - }) - ])), - Err(err) => Err(ExpansionError( - format!("Failed to format string: {}", err.message()), - format_str.site.clone())) - } - } - - fn expand_namespace_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - // Start evaluating all the arguments to the macro in a separate context. - let context = self.clone(); - let params = context.expand_nodes(params)?; - let mut args = params.iter().peekable(); - let Some(namespace) = args.next().and_then(ParseNode::atomic) else { - return Err(ExpansionError::new("Expected a namespace name.", node.site())); - }; - // Parse options to macro. - let mut seperator = "/"; // Default namespace seperator is `/`. - while let Some(ParseNode::Attribute { keyword, node, site, .. }) = args.peek() { - let _ = args.next(); - match keyword.as_str() { - "separator" => match node.atomic() { - Some(Node { value, .. }) => seperator = &value, - None => return Err(ExpansionError( - format!("`%namespace' separator must be a symbol, got a {}.", node.node_type()), - node.owned_site())), - }, - opt => return Err(ExpansionError( - format!("Unknown option `:{}' to `%namespace' macro.", opt), - site.clone())), - } - } - // Find all the definitions made within the context of the - // `%namespace` macro and include the defintion prefixed by - // the namespace in the *current* scope. - { - let mut self_defs = self.definitions.borrow_mut(); - let defs = context.definitions.borrow(); - for (key, value) in defs.iter() { - let new_key = format!("{}{}{}", namespace.value, seperator, key); - self_defs.insert(new_key, value.clone()); - } - } - // Return remaining body of the macro. - Ok(args.cloned().collect()) - } - - fn expand_raw_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let mut builder = String::new(); - let args = self.expand_nodes(params)?; - for arg in args { - let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else { - return Err(ExpansionError( - format!("Expected a literal, found a {} node instead.", arg.node_type()), - arg.owned_site())); - }; - builder += leading_whitespace; - builder += value; - } - Ok(Box::new([ - ParseNode::Raw(Node { - value: builder, - site: node.owned_site(), - leading_whitespace: node.leading_whitespace().to_owned(), - }) - ])) - } - - fn expand_string_macro(&self, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let mut builder = String::new(); - let args = self.expand_nodes(params)?; - for arg in args { - let Some(Node { value, leading_whitespace, .. }) = arg.atomic() else { - return Err(ExpansionError( - format!("Expected a literal, found a {} node instead.", arg.node_type()), - arg.owned_site())); - }; - builder += leading_whitespace; - builder += value; - } - Ok(Box::new([ - ParseNode::String(Node { - value: builder, - site: node.owned_site(), - leading_whitespace: node.leading_whitespace().to_owned(), - }) - ])) - } - - fn expand_macro(&self, name: &str, node: &ParseNode<'a>, params: ParseTree<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - // Eagerly evaluate parameters passed to macro invocation. - let params = self.expand_nodes(params)?; - - let Some(mac) = self.get_variable(name) else { - return Err(ExpansionError::new( - &format!("Macro not found (`{}').", name), &node.owned_site())) - }; - - // Instance of expansion subcontext. - let subcontext = self.create_subcontext(); - // Check enough arguments were given. - if params.len() != mac.params.len() { - return Err(ExpansionError( - format!("`%{}` macro expects {} arguments, \ - but {} were given.", &mac.name, mac.params.len(), - params.len()), node.site().to_owned())); - } - // Define arguments for body. - for i in 0..params.len() { - let arg_macro = Macro { - name: mac.params[i].to_owned(), - params: Box::new([]), - body: Box::new([params[i].clone()]), //< Argument as evaluated at call-site. - }; - subcontext.insert_variable(mac.params[i].to_string(), Rc::new(arg_macro)); - } - // Expand body. - let mut expanded = subcontext.expand_nodes(mac.body.clone())?.to_vec(); - // Inherit leading whitespace of invocation. - if let Some(first_node) = expanded.get_mut(0) { - first_node.set_leading_whitespace(node.leading_whitespace().to_owned()); - } - Ok(expanded.into_boxed_slice()) - } - - fn expand_invocation(&self, - name: &str, //< Name of macro (e.g. %define). - node: &ParseNode<'a>, //< Node for `%'-macro invocation. - params: Box<[ParseNode<'a>]> //< Passed in arguments. - ) -> Result<ParseTree<'a>, ExpansionError<'a>> { - // Some macros are lazy (e.g. `ifdef`), so each macro has to - // expand the macros in its arguments individually. - match name { - "define" => self.expand_define_macro(node, params), - "ifdef" => self.expand_ifdef_macro(node, params), - "raw" => self.expand_raw_macro(node, params), - "string" => self.expand_string_macro(node, params), - "include" => self.expand_include_macro(node, params), - "embed" => self.expand_embed_macro(node, params), - "namespace" => self.expand_namespace_macro(node, params), - "date" => self.expand_date_macro(node, params), - "log" => self.expand_log_macro(node, params), - "format" => self.expand_format_macro(node, params), - "os/env" => self.expand_os_env_macro(node, params), - _ => self.expand_macro(name, node, params), - } - } - - pub fn expand_node(&self, node: ParseNode<'a>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - match node { - ParseNode::Symbol(ref sym) => { - // Check if symbol starts with %... and replace it - // with it's defined value. - if sym.value.starts_with("%") { - let name = &sym.value[1..]; - if let Some(def) = self.get_variable(name) { - if !def.params.is_empty() { // Should not be a function. - return Err(ExpansionError::new( - &format!("`{}` is a macro that takes arguments, \ - and cannot be used as a variable.", name), - &sym.site)) - } - Ok(def.body.clone()) - } else { // Not found. - Err(ExpansionError( - format!("No such macro, `{}`.", name), - sym.site.to_owned())) - } - } else { - Ok(Box::new([node])) - } - }, - ParseNode::List { ref nodes, ref site, ref end_token, ref leading_whitespace } => { - // Check for macro invocation (%_ _ _ _). - // Recurse over every element. - let len = nodes.len(); - let mut call = nodes.to_vec().into_iter(); - let head = call.next(); - - // Pathway: (%_ _ _) macro invocation. - if let Some(ref symbol@ParseNode::Symbol(..)) = head { - let node = self.register_invocation(node.clone()); - let name = symbol.atomic().unwrap().value.clone(); - if name.starts_with("%") { - // Rebuild node... - let name = &name[1..]; - let mut params: Vec<ParseNode> = call.collect(); - // Delete leading whitespace of leading argument. - if let Some(leading) = params.first_mut() { - if !leading.leading_whitespace().contains('\n') { - leading.set_leading_whitespace(String::from("")); - } - } - return self.expand_invocation(name, node, params.into_boxed_slice()); - } - } - // Otherwise, if not a macro, just expand child nodes incase they are macros. - let mut expanded_list = Vec::with_capacity(len); - expanded_list.extend(self.expand_node(head.unwrap().clone())?); - for elem in call { - expanded_list.extend(self.expand_node(elem)?); - } - - Ok(Box::new([ParseNode::List { - nodes: expanded_list.into_boxed_slice(), - site: site.clone(), - end_token: end_token.clone(), - leading_whitespace: leading_whitespace.clone(), - }])) - }, - ParseNode::Attribute { keyword, node, site, leading_whitespace } => { - let mut expanded_nodes = self.expand_node(*node)?; - let new_node = Box::new(expanded_nodes[0].clone()); - expanded_nodes[0] = ParseNode::Attribute { - keyword: keyword.clone(), - node: new_node, - site: site.clone(), - leading_whitespace: leading_whitespace.clone(), - }; - Ok(expanded_nodes) - }, - _ => Ok(Box::new([node])) - } - } - - pub fn expand_nodes(&self, tree: Box<[ParseNode<'a>]>) - -> Result<ParseTree<'a>, ExpansionError<'a>> { - let mut expanded = Vec::with_capacity(tree.len()); - for branch in tree { - expanded.extend(self.expand_node(branch)?); - } - Ok(expanded.into_boxed_slice()) - } - - pub fn expand(&'a self) -> Result<ParseTree<'a>, Box<dyn 'a + std::error::Error>> { - let tree = self.parser.parse()?; - let expanded = self.expand_nodes(tree)?; - Ok(expanded) - } -} diff --git a/src/parse/mod.rs b/src/parse/mod.rs @@ -1,15 +0,0 @@ -pub mod tokens; -pub mod lexer; -pub mod parser; -pub mod expander; - -pub use parser::ParseTree; -use std::{fs, path::Path, error::Error}; - -/// Build a parser for a file without expanding macros. -pub fn parser_for_file(path: &Path) -> Result<parser::Parser, Box<dyn Error>> { - let contents = fs::read_to_string(&path)?; - let tokenizer = lexer::Lexer::new(path.to_string_lossy().to_string(), contents); - let builder = parser::Parser::new(tokenizer); - Ok(builder) -} diff --git a/src/parse/parser.rs b/src/parse/parser.rs @@ -1,444 +0,0 @@ -use std::{fmt, error::Error}; -use unicode_width::UnicodeWidthStr; -use descape::UnescapeExt; - -use super::{lexer::{LexError, Lexer}, tokens::{Kind, Site, Token}}; - -/// The [`Node`] type represents what atomic/literals are parsed -/// into; i.e. not compound types (e.g. lists, attributes). -/// These are just a common storage for the literals in [`ParseNode`]. -#[derive(Debug, Clone)] -pub struct Node<'a> { - pub value: String, - pub site: Site<'a>, - pub leading_whitespace: String, -} - -impl<'a> Node<'a> { - pub fn new(value: &str, site: &Site<'a>, leading_whitespace: &str) -> Self { - Self { - site: site.to_owned(), - value: value.to_owned(), - leading_whitespace: leading_whitespace.to_owned(), - } - } -} - -/// Parse nodes are the components of the syntax tree that -/// the source code is translated into. -/// These nodes are also produced at compile-time by the macro expander. -#[derive(Debug, Clone)] -pub enum ParseNode<'a> { - Symbol(Node<'a>), - Number(Node<'a>), - String(Node<'a>), - Raw(Node<'a>), //< Raw-content strings are not parsed, only expanded by macros. - List { - nodes: Box<[ParseNode<'a>]>, - site: Site<'a>, - end_token: Token<'a>, - leading_whitespace: String, - }, - Attribute { - keyword: String, - node: Box<ParseNode<'a>>, - site: Site<'a>, - leading_whitespace: String, - }, -} - -impl<'a> ParseNode<'a> { - /// Unwrap a literal node if it is a symbol or number. - pub fn symbolic(&self) -> Option<&Node<'a>> { - match self { - Self::Symbol(ref node) - | Self::Number(ref node) => Some(node), - _ => None, - } - } - - /// Unwrap string-like nodes. - pub fn string(&self) -> Option<&Node<'a>> { - match self { - Self::String(ref node) | Self::Raw(ref node) => Some(node), - _ => None, - } - } - - /// Unwrap literal (atomic) nodes into their underlying [`Node`]. - pub fn atomic(&self) -> Option<&Node<'a>> { - match self { - Self::Symbol(ref node) - | Self::Number(ref node) - | Self::String(ref node) - | Self::Raw(ref node) => Some(node), - _ => None, - } - } - - /// Same as [`Self::atomic`], but consumes the node, - /// returning an owned [`Node`]. - pub fn into_atomic(self) -> Option<Node<'a>> { - match self { - Self::Symbol(node) - | Self::Number(node) - | Self::String(node) => Some(node), - _ => None, - } - } - - /// Get a reference to the parse node's underlying [`Site`]. - pub fn site(&self) -> &Site<'a> { - match self { - Self::Symbol(ref node) - | Self::Number(ref node) - | Self::String(ref node) - | Self::Raw(ref node) => &node.site, - Self::List { ref site, .. } => site, - Self::Attribute { ref site, .. } => site, - } - } - - /// Clone the underlying [`Site`] of this parse node. - pub fn owned_site(&self) -> Site<'a> { - match self { - Self::Symbol(node) - | Self::Number(node) - | Self::String(node) - | Self::Raw(node) => node.site.clone(), - Self::List { site, .. } => site.clone(), - Self::Attribute { site, .. } => site.clone(), - } - } - - /// Get a reference to the underlying leading whitespace string - /// of this parse node. - pub fn leading_whitespace(&self) -> &str { - match self { - Self::Symbol(ref node) - | Self::Number(ref node) - | Self::String(ref node) - | Self::Raw(ref node) => &node.leading_whitespace, - Self::List { ref leading_whitespace, .. } => leading_whitespace, - Self::Attribute { ref leading_whitespace, .. } => leading_whitespace, - } - } - - /// Modify the underlying leading whitespace stored for this parse node. - pub fn set_leading_whitespace(&mut self, whitespace: String) { - match self { - Self::Symbol(ref mut node) - | Self::Number(ref mut node) - | Self::String(ref mut node) - | Self::Raw(ref mut node) => node.leading_whitespace = whitespace, - Self::List { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, - Self::Attribute { ref mut leading_whitespace, .. } => *leading_whitespace = whitespace, - }; - } - - /// Get a `&'static str` string name of what type of parse node this is. - pub fn node_type(&self) -> &'static str { - match self { - Self::Symbol(..) => "symbol", - Self::Number(..) => "number", - Self::String(..) => "string", - Self::Raw(..) => "raw-content string", - Self::List { .. } => "list", - Self::Attribute { .. } => "attribute", - } - } -} - -/// An array of parse nodes, like in a [`ParseNode::List`], never grows. -/// Hence we prefer the `Box<[...]>` representation over a `Vec<...>`. -pub type ParseTree<'a> = Box<[ParseNode<'a>]>; - -#[derive(Debug, Clone)] -pub struct ParseError<'a>(pub String, pub Site<'a>); - -impl<'a> fmt::Display for ParseError<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let ParseError(msg, site) = self; - let line_prefix = format!(" {} |", site.line); - let line_view = site.line_slice(); - writeln!(f, "{} {}", line_prefix, line_view)?; - writeln!(f, "{:>prefix_offset$} {:~>text_offset$}{:^>length$}", "|", "", "", - prefix_offset=UnicodeWidthStr::width(line_prefix.as_str()), - text_offset=site.line_column() - 1, - length=site.width())?; - write!(f, "[**] Parse Error ({}:{}:{}): {}", - site.source, site.line, site.line_column(), msg) - } -} - -impl<'a> Error for ParseError<'a> { } - -/// Parser structure walks through source using lexer. -#[derive(Debug, Clone)] -pub struct Parser { - lexer: Lexer, //< Parser owns a lexer. -} - -impl<'a> Parser { - pub fn new(lexer: Lexer) -> Self { - Self { lexer } - } - - pub fn get_source(&self) -> &str { - self.lexer.get_source() - } - - /// Parse whole source code, finishing off the lexer. - pub fn parse(&'a self) -> Result<ParseTree, Box<dyn Error + 'a>> { - let mut root: Vec<ParseNode> = Vec::new(); - while !self.lexer.eof() { - let expr = self.parse_expr()?; - root.push(expr); - } - return Ok(root.into_boxed_slice()); - } - - /// Produce a parse node from the current position in the lexer. - pub fn parse_expr(&'a self) -> Result<ParseNode, Box<dyn Error + 'a>> { - let token = self.lexer.peek()?; - match token.kind { - Kind::LParen => self.parse_list(), - Kind::RParen => Err(ParseError( - "Unexpected `)' closing parenthesis.".to_owned(), - token.site.to_owned()))?, - Kind::Keyword => self.parse_keyword(), - Kind::Symbol => Ok(ParseNode::Symbol(self.parse_atomic()?)), - // TODO: Parse (escpae) string-literals. - Kind::String => Ok(ParseNode::String(self.parse_atomic()?)), - Kind::Number => Ok(ParseNode::Number(self.parse_atomic()?)), - } - } - - /// Parse keyword-attribute pair. - fn parse_keyword(&'a self) -> Result<ParseNode, Box<dyn Error + 'a>> { - // Consume :keyword token. - let token = self.lexer.consume()?; - assert_eq!(token.kind, Kind::Keyword); - // Check we are able to consume next expression for keyword's value. - { - let no_expr_error = ParseError( - format!("Keyword `:{}' expects an expression follwing it.", token.value), - token.site.to_owned()); - if self.lexer.eof() { Err(no_expr_error.clone())? ;} - match self.lexer.peek()? { - Token { kind: Kind::RParen, .. } => Err(no_expr_error)?, - _ => () - } - } - // Otherwise, parse the value and combine the node. - let value = self.parse_expr()?; - Ok(ParseNode::Attribute { - keyword: token.value.to_owned(), - node: Box::new(value), - site: token.site.to_owned(), - leading_whitespace: token.leading_whitespace.to_owned(), - }) - } - - /// Parse a literal node. - /// This is where escapes in symbols and strings are handled. - fn parse_atomic(&'a self) -> Result<Node<'a>, LexError<'a>> { - let token = self.lexer.consume()?; - let value = match token.kind { - Kind::Symbol | Kind::Number | Kind::Keyword => escape_sanitize(token.value), - Kind::String => escape_string(token.value, &token.site)?, - _ => unreachable!("called `parse_atomic` on non-atomic token."), - }; - Ok(Node { - value, - site: token.site.clone(), - leading_whitespace: token.leading_whitespace.to_string(), - }) - } - - /// Parse a list `( [...] )'. - fn parse_list(&'a self) -> Result<ParseNode<'a>, Box<dyn Error + 'a>> { - // Consumed the `(' token. - let lparen = self.lexer.consume()?; - assert_eq!(lparen.kind, Kind::LParen); - // Collect list elements. - let mut elements = Vec::new(); - let mut rparen: Option<Token> = None; - while !self.lexer.eof() { - // Keep parsing expressions until `)' is reached. - let token = self.lexer.peek()?; - if token.kind == Kind::RParen { - rparen = Some(self.lexer.consume()?); // Swallow up `)'. - break; - } - let expr = self.parse_expr()?; - elements.push(expr); - } - // Closing parenthesis was never found. - let Some(rparen) = rparen else { - return Err(ParseError( - "Expected `)' closing parenthesis.".to_owned(), - lparen.site.to_owned()))?; - }; - Ok(ParseNode::List { - nodes: elements.into_boxed_slice(), - site: lparen.site.to_owned(), - end_token: rparen.to_owned(), - leading_whitespace: lparen.leading_whitespace.to_owned(), - }) - } -} - -/// Santize any escaped characters by removing their leading backslash. -fn escape_sanitize(string: &str) -> String { - let mut builder = String::with_capacity(string.len()); - let mut chars = string.chars(); - while let Some(c) = chars.next() { - if c == '\\' { continue; } - builder.push(c) - } - builder -} - -/// Parse a string with its escapes. -/// **Note:** Uses the `descape` crate for now. -fn escape_string<'a>(string: &'a str, site: &Site<'a>) -> Result<String, LexError<'a>> { - string.to_unescaped() - .map(|s| s.to_string()) - .map_err(|index| { - LexError( - format!("Invalid escape `\\{}' at byte-index {}.", - string.chars().nth(index).unwrap_or('?'), index), - site.clone()) - }) -} - -pub trait SearchTree<'a> { - /// Search the parse-tree for a specific node with a specific value. - fn search_node(&'a self, kind: SearchType, - value: &str, - case_insensitive: bool, - level: usize) -> Option<&ParseNode<'a>>; -} - -#[derive(Clone, Copy, PartialEq)] -pub enum SearchType { - ListHead, ListMember, - Symbol, Number, String, - Attribute, - Any, -} - -impl SearchType { - pub fn is_a(self, kind: SearchType) -> bool { - self == SearchType::Any || self == kind - } -} - -impl<'a> SearchTree<'a> for ParseNode<'a> { - fn search_node(&'a self, kind: SearchType, value: &str, - insensitive: bool, level: usize) -> Option<&ParseNode<'a>> { - if level == 0 { - return None; - } - - let is_equal = |string: &str| if insensitive { - string.to_lowercase() == value.to_lowercase() - } else { - string == value - }; - - match self { - ParseNode::List { nodes, .. } => { - if kind.is_a(SearchType::ListHead) { - if let Some(Some(caller)) = nodes.get(0).map(ParseNode::atomic) { - if is_equal(&caller.value) { - return Some(self); - } - } - } - nodes.search_node(kind, value, insensitive, level - 1) - }, - ParseNode::Symbol(name) => { - if kind.is_a(SearchType::Symbol) && is_equal(&name.value) { - Some(self) - } else { - None - } - }, - ParseNode::String(name) | ParseNode::Raw(name) => { - if kind.is_a(SearchType::String) && is_equal(&name.value) { - Some(self) - } else { - None - } - }, - ParseNode::Number(name) => { - if kind.is_a(SearchType::Number) && is_equal(&name.value) { - Some(self) - } else { - None - } - }, - ParseNode::Attribute { node, ref keyword, .. } => { - if kind.is_a(SearchType::Attribute) { - if is_equal(keyword) { - return Some(node); - } - } - node.search_node(kind, value, insensitive, level - 1) - }, - } - } -} - -impl<'a> SearchTree<'a> for ParseTree<'a> { - fn search_node(&'a self, kind: SearchType, value: &str, - insensitive: bool, level: usize) -> Option<&ParseNode<'a>> { - if level == 0 { - return None; - } - - for node in self { - let found = node.search_node(kind, value, insensitive, level); - if found.is_some() { - return found; - } - } - - None - } -} - -/// Pretty printing for parse nodes. -#[cfg(feature="debug")] -impl<'a> fmt::Display for ParseNode<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ParseNode::Symbol(node) - | ParseNode::Number(node) => write!(f, "{}", &node.value), - ParseNode::String(node) => { - if node.value.trim().is_empty() { - write!(f, "") - } else { - write!(f, "\"{}\"", &node.value) - } - }, - ParseNode::Attribute { keyword, node, .. } => write!(f, ":{} {}", - &keyword, &*node), - ParseNode::List { nodes, .. } => if nodes.len() == 0 { - write!(f, "()") - } else if let [single] = &**nodes { - write!(f, "({})", single) - } else { - write!(f, "({}{})", nodes[0], - nodes[1..].iter().fold(String::new(), |acc, elem| { - let nested = elem.to_string().split('\n') - .fold(String::new(), |acc, e| - acc + "\n " + &e); - acc + &nested - })) - } - } - } -} diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs @@ -1,116 +0,0 @@ -use std::fmt::{self, Display}; -use unicode_width::UnicodeWidthStr; - -#[derive(Debug, Clone)] -pub struct Site<'a> { - pub source: &'a str, - pub source_code: &'a str, - pub line: usize, - pub bytes_from_start: usize, - pub bytes_from_start_of_line: usize, - pub bytes_span: usize, -} - -pub const UNKNOWN_SITE: Site<'static> = Site { - source: "<unknwon>", - source_code: "", - line: 0, - bytes_from_start: 0, - bytes_from_start_of_line: 0, - bytes_span: 0, -}; - -impl<'a> Site<'a> { - pub fn new(source: &'a str, - source_code: &'a str, - line: usize, - bytes_from_start: usize, - bytes_from_start_of_line: usize, - bytes_span: usize) -> Self { - Self { - source, - source_code, - line, - bytes_from_start, - bytes_from_start_of_line, - bytes_span, - } - } - - pub const fn unknown() -> Self { UNKNOWN_SITE } - - /// Byte-offset in source code for start-of-line where this site is. - pub fn start_of_line(&self) -> usize { - self.bytes_from_start - self.bytes_from_start_of_line - } - - /// Find byte-offset in source code of end-of-line where this site is. - pub fn end_of_line(&self) -> usize { - let mut i = self.bytes_from_start; - let bytes = self.source_code.as_bytes(); - while i < self.source_code.len() { - if bytes[i] == '\n' as u8 { - return i; - } - i += 1; - } - return i; - } - - pub fn view(&'a self) -> &'a str { - let start = self.bytes_from_start; - let end = start + self.bytes_span; - &self.source_code[start..end] - } - - /// Get string view into whole line that site is referencing. - pub fn line_slice(&self) -> &'a str { - &self.source_code[self.start_of_line()..self.end_of_line()] - } - - /// Compute (monospace, terminal) column width of piece of text - /// referenced by this site in the source code. - pub fn width(&self) -> usize { - let text = &self.source_code[self.bytes_from_start..self.bytes_from_start + self.bytes_span]; - UnicodeWidthStr::width(text) - } - - /// Compute which column the site starts at on the line. - pub fn line_column(&self) -> usize { - let preceeding = &self.source_code[self.start_of_line()..self.bytes_from_start]; - UnicodeWidthStr::width(preceeding) + 1 - } -} - -impl<'a> Display for Site<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "(")?; - write!(f, "{}:", self.source)?; - write!(f, "{}:{}", self.line, self.line_column())?; - write!(f, ")") - } -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Kind { - LParen, - RParen, - Symbol, - String, - Number, - Keyword, -} - -#[derive(Debug, Clone)] -pub struct Token<'a> { - pub kind: Kind, - pub value: &'a str, - pub leading_whitespace: &'a str, - pub site: Site<'a>, -} - -impl<'a> Token<'a> { - pub fn new(kind: Kind, value: &'a str, leading_whitespace: &'a str, site: Site<'a>) -> Self { - Self { kind, value, leading_whitespace, site } - } -} diff --git a/src/seam_argparse_proc_macro/lib.rs b/src/seam_argparse_proc_macro/lib.rs @@ -0,0 +1,6 @@ +use proc_macro::TokenStream; + +#[proc_macro] +pub fn make_answer(stream: TokenStream) -> TokenStream { + "fn answer() -> u32 { 42 }".parse().unwrap() +}