token.rs (4166B)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | use std::fmt;
use crate::site::Site;
#[cfg(feature="debug")]
use {
snailquote::escape,
unicode_width::UnicodeWidthStr,
std::collections::VecDeque
};
/// # TODO: Use this.
/// Way of representing a level of indentation.
#[allow(dead_code)]
enum Indent {
Tab,
Spaces(u32),
}
/// Contains all possible types/classes of
/// lexiacal tokens.
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
pub enum TokenType {
/// Identifiers, variables, function names etc.
Ident,
/// Numerics, anything that directly represents a number.
Num,
/// Any operators, simular to idents but are lexed differently.
Op,
/// Symbols, they are like elements of C enums, they begin with a colon.
Sym,
/// Strings, enclosed by double quotes ("...").
Str,
/// Left Parenthesis.
LParen,
/// Rigt Parenthesis.
RParen,
/// Left Square Bracket.
LBrack,
/// Right Square Bracket.
RBrack,
/// Left curly-brace.
LBrace,
/// Right curly-brace.
RBrace,
/// Left vector-list bracket.
LVec,
/// Right vector-list bracket.
RVec,
/// Terminator, something that ends a line.
/// Either a semi-colon (;) or a new-line (\n).
Term,
/// End Of File, last token in the stream.
EOF,
}
impl fmt::Display for TokenType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let printable = match *self {
Self::Ident => "Identifier",
Self::Num => "Numeric",
Self::Op => "Operator",
Self::Sym => "Symbol",
Self::Str => "String",
Self::LParen => "L-Paren",
Self::RParen => "R-Paren",
Self::LBrack => "L-Bracket",
Self::RBrack => "R-Bracket",
Self::LBrace => "L-Brace",
Self::RBrace => "R-Brace",
Self::LVec => "L-Vector",
Self::RVec => "R-Vector",
Self::Term => "Terminator",
Self::EOF => "End-Of-File",
};
write!(f, "{}", printable)
}
}
/// Token structure, an individual lexical token,
/// represented by its type/class, what it was written as
/// in the program, and its location in the code.
#[derive(Clone)]
pub struct Token {
/// What type/class of token it is.
pub class : TokenType,
/// What string the token matched with.
pub string : String,
/// Where the token is in the code.
pub location : Site,
}
impl Token {
/// Constructs a new Token structure.
pub fn new(class : TokenType, string : &str, site : Site) -> Token {
Token { class, string: String::from(string), location: site }
}
/// Checks if the token represents an atomic datum.
pub fn is_atomic(&self) -> bool {
match self.class {
TokenType::Ident
| TokenType::Num
| TokenType::Op
| TokenType::Sym
| TokenType::Str => true,
_ => false,
}
}
}
/// String representation of the token.
#[cfg(feature="debug")]
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut escaped = escape(&self.string.to_string()).into_owned();
if !escaped.ends_with('"') {
escaped = format!("\"{}\"", escaped);
}
write!(f, "[ {class}:{spaces1}{rep}{spaces2}{l}:{c} ({span}) ]",
class=self.class, rep=escaped,
spaces1=" ".repeat(12 - self.class.to_string().width()),
spaces2=" ".repeat(30 - escaped.width()),
l=self.location.location.line.unwrap(),
c=self.location.location.column.unwrap(),
span=self.location.location.columns.unwrap())
}
}
/// Allows for a custom string representation for the
/// token-stream as a whole.
#[cfg(feature="debug")]
pub trait ShowStream {
/// String representation of token-stream.
fn to_string(&self) -> String;
}
#[cfg(feature="debug")]
impl ShowStream for VecDeque<Token> {
fn to_string(&self) -> String {
let lines : Vec<String> = self.iter().map(Token::to_string).collect();
format!("[ {} ]", lines.join(",\n "))
}
}
|