tokens.rs (4290B)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | use std::{fmt::{self, Display}, hash::{DefaultHasher, Hash, Hasher}};
use unicode_width::UnicodeWidthStr;
/// Precise source-code location a parsed (or lexed) node (or token).
/// Including references to the source-code and path, line number, bytes offsets
/// within the file, including from start of line, and the number of
/// bytes it occupies in the source.
#[derive(Debug, Clone, Copy)]
pub struct Site<'a> {
pub source: &'a str,
pub source_code: &'a str,
pub line: usize,
pub bytes_from_start: usize,
pub bytes_from_start_of_line: usize,
pub bytes_span: usize,
}
/// Dummy (unknown) site.
pub const UNKNOWN_SITE: Site<'static> = Site {
source: "<unknwon>",
source_code: "",
line: 0,
bytes_from_start: 0,
bytes_from_start_of_line: 0,
bytes_span: 0,
};
impl<'a> Site<'a> {
pub fn new(source: &'a str,
source_code: &'a str,
line: usize,
bytes_from_start: usize,
bytes_from_start_of_line: usize,
bytes_span: usize) -> Self {
Self {
source,
source_code,
line,
bytes_from_start,
bytes_from_start_of_line,
bytes_span,
}
}
pub fn uuid(&self) -> String {
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
let hash = hasher.finish();
format!("{}-{}", hash, self.bytes_from_start)
}
pub const fn unknown() -> Self { UNKNOWN_SITE }
/// Byte-offset in source code for start-of-line where this site is.
pub fn start_of_line(&self) -> usize {
self.bytes_from_start - self.bytes_from_start_of_line
}
/// Find byte-offset in source code of end-of-line where this site is.
pub fn end_of_line(&self) -> usize {
let mut i = self.bytes_from_start;
let bytes = self.source_code.as_bytes();
while i < self.source_code.len() {
if bytes[i] == '\n' as u8 {
return i;
}
i += 1;
}
return i;
}
/// Get a string slice into the part of the source-code
/// which occupies the location this site references.
pub fn view(&'a self) -> &'a str {
let start = self.bytes_from_start;
let end = start + self.bytes_span;
&self.source_code[start..end]
}
/// Get string view into whole line that this site is referencing.
pub fn line_slice(&self) -> &'a str {
&self.source_code[self.start_of_line()..self.end_of_line()]
}
/// Compute (monospace, terminal) column width of piece of text
/// referenced by this site in the source code.
pub fn width(&self) -> usize {
let text = &self.source_code[self.bytes_from_start..self.bytes_from_start + self.bytes_span];
UnicodeWidthStr::width(text)
}
/// Compute which column the site starts at on the line,
/// accounting for the rendered number of columns for each character
/// in a terminal, according to the same procedure as [`Self::width`].
pub fn line_column(&self) -> usize {
let preceeding = &self.source_code[self.start_of_line()..self.bytes_from_start];
UnicodeWidthStr::width(preceeding) + 1
}
}
impl<'a> Hash for Site<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.source.hash(state);
self.line.hash(state);
self.bytes_from_start.hash(state);
self.bytes_from_start_of_line.hash(state);
self.bytes_span.hash(state);
}
}
impl<'a> Display for Site<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "(")?;
write!(f, "{}:", self.source)?;
write!(f, "{}:{}", self.line, self.line_column())?;
write!(f, ")")
}
}
/// Kinds of possible tokens.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Kind {
LParen,
RParen,
Symbol,
String,
Number,
Keyword,
}
#[derive(Debug, Clone, Copy)]
pub struct Token<'a> {
pub kind: Kind,
pub value: &'a str,
pub leading_whitespace: &'a str,
pub site: Site<'a>,
}
impl<'a> Token<'a> {
pub fn new(kind: Kind, value: &'a str, leading_whitespace: &'a str, site: Site<'a>) -> Self {
Self { kind, value, leading_whitespace, site }
}
}
|