crepl

An intuitive calculator REPL.
git clone git://git.knutsen.co/crepl
Log | Files | Refs | README | LICENSE

commit 0615e712b31c255559e50c64329c4ba1acacf1b1
Author: Demonstrandum <moi@knutsen.co>
Date:   Tue, 23 Jun 2020 08:55:43 +0100

Parser, lexer and pretty printing working.

Diffstat:
A.gitignore | 11+++++++++++
AMakefile | 31+++++++++++++++++++++++++++++++
AREADME.md | 0
Asrc/displays.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/displays.h | 3+++
Asrc/error.c | 37+++++++++++++++++++++++++++++++++++++
Asrc/error.h | 16++++++++++++++++
Asrc/execute.c | 1+
Asrc/execute.h | 1+
Asrc/main.c | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse.c | 406+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse.h | 130+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/prelude.c | 23+++++++++++++++++++++++
Asrc/prelude.h | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
14 files changed, 843 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,11 @@ +# Outfile +crepl +*.out + +# Object files +*.o + +# Misc +*.log +desktop.ini +.DS_Store diff --git a/Makefile b/Makefile @@ -0,0 +1,31 @@ +CC=gcc +CFLAGS=-Wall -Wpedantic +TARGET=crepl +OBJS=main.o prelude.o parse.o displays.o error.o execute.o +LINKS=-lm -lreadline + +all: clean $(OBJS) + $(CC) -o $(TARGET) $(LINKS) $(OBJS) + @printf "\n\033[1mBuilt \`$(TARGET)' successfully.\033[0m\n" + +prelude.o: error.o + $(CC) -c $(CFLAGS) src/prelude.c + +main.o: prelude.o parse.o error.o + $(CC) -c $(CFLAGS) src/main.c + +parse.o: error.o + $(CC) -c $(CFLAGS) src/parse.c + +displays.o: parse.o + $(CC) -c $(CFLAGS) src/displays.c + +execute.o: parse.o error.o + $(CC) -c $(CFLAGS) src/execute.c + +error.o: + $(CC) -c $(CFLAGS) src/error.c + +clean: + @echo "Cleaning previous build." + rm -f $(TARGET) $(OBJS) diff --git a/README.md b/README.md diff --git a/src/displays.c b/src/displays.c @@ -0,0 +1,55 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "prelude.h" +#include "parse.h" +#include "displays.h" + +char *display_parsetree(const ParseNode *tree) +{ + if (tree == NULL) + return "NULL"; + switch (tree->type) { + case IDENT_NODE: { + return tree->node.ident.value; + } + case NUMBER_NODE: { + // TODO: Check the number type (int, float, etc.). + char *number = malloc(sizeof(char) * 64); // Guess? + sprintf(number, "%ld", tree->node.number.value.i); + return number; + } + case UNARY_NODE: { + UnaryNode unary = tree->node.unary; + char *operand_str = display_parsetree(unary.operand); + char *callee_str = display_parsetree(unary.callee); + + char *unary_str = malloc(sizeof(char) * ( + + strlen(operand_str) + + strlen(callee_str) + + 4 /* <- Extra padding */)); + if (unary.is_postfix) + sprintf(unary_str, "(%s %s)", operand_str, callee_str); + else + sprintf(unary_str, "(%s %s)", callee_str, operand_str); + return unary_str; + } + case BINARY_NODE: { + BinaryNode binary = tree->node.binary; + char *left_str = display_parsetree(binary.left); + char *right_str = display_parsetree(binary.right); + char *callee_str = display_parsetree(binary.callee); + + char *binary_str = malloc(sizeof(char) * ( + + strlen(left_str) + + strlen(right_str) + + strlen(callee_str) + + 6 /* <- Extra padding */)); + sprintf(binary_str, "(%s %s %s)", left_str, callee_str, right_str); + return binary_str; + } + default: + return "[Unknown Parse Node]"; + } +} diff --git a/src/displays.h b/src/displays.h @@ -0,0 +1,3 @@ +#include "parse.h" + +char *display_parsetree(const ParseNode *); diff --git a/src/error.c b/src/error.c @@ -0,0 +1,37 @@ +#include "error.h" + +#include <stdio.h> +#include <string.h> + +const char *error_name(error_t err) +{ + switch (err) { + case NO_ERROR: + return "IMPROPERLY REPORTED ERROR"; + case SYNTAX_ERROR: + return "Syntax Error"; + case PARSE_ERROR: + return "Grammar Error"; + case EXECUTION_ERROR: + return "Error while executing"; + default: + return "UNKNOWN ERROR"; + } +} + +#define DEFAULT_ERROR_MSG "No errors reported." + +error_t ERROR_TYPE = NO_ERROR; +char ERROR_MSG[256] = DEFAULT_ERROR_MSG; + +void handle_error() +{ + // Display error. + printf("\033[31;1m%s\033[0m: %s\n", + error_name(ERROR_TYPE), + ERROR_MSG); + // Reset error values. + ERROR_TYPE = NO_ERROR; + strcpy(ERROR_MSG, DEFAULT_ERROR_MSG); +} + diff --git a/src/error.h b/src/error.h @@ -0,0 +1,16 @@ +#pragma once + +typedef enum { + NO_ERROR, + SYNTAX_ERROR, + PARSE_ERROR, + EXECUTION_ERROR, +} error_t; + + +const char *error_name(error_t); + +extern error_t ERROR_TYPE; +extern char ERROR_MSG[256]; + +void handle_error(); diff --git a/src/execute.c b/src/execute.c @@ -0,0 +1 @@ +#include "execute.h" diff --git a/src/execute.h b/src/execute.h @@ -0,0 +1 @@ +#include "prelude.h" diff --git a/src/main.c b/src/main.c @@ -0,0 +1,58 @@ +#include <stdio.h> +#include <readline/readline.h> +#include <readline/history.h> + +#include <stdlib.h> +#include <string.h> + +#include "prelude.h" +#include "error.h" +#include "parse.h" +#include "displays.h" + +static const char *PROMPT = "::> "; + +int main(int argc, char **argv) +{ + printf("\033[1m"); + printf("CREPL — Calculator Read Eval Print Loop"); + printf("\033[0m"); + puts(" (" COMPILER ") (" __DATE__ ")"); + puts("Type \"exit\" or [Ctrl+D] (i.e. EOF) to quit."); + + rl_bind_key('\t', rl_complete); + + char *response = NULL; + do { + response = readline(PROMPT); + add_history(response); + + if (response == NULL + || strcmp("exit", downcase(trim(response))) == 0) + break; + + // Try to lex & parse the input. + ParseNode *tree = parse(response); + + if (ERROR_TYPE != NO_ERROR) { + handle_error(); + continue; + } + + if (tree == NULL) continue; + + printf("\033[%luC\033[1A", strlen(PROMPT) + strlen(response)); + printf(" ≡ %s\n", display_parsetree(tree)); + printf("#=> %s\n", response); + + free_parsenode(tree); + free(response); + } while (true); + + printf("\r\033[2K"); + printf("Buh-bye.\n"); + + return EXIT_SUCCESS; +} + + diff --git a/src/parse.c b/src/parse.c @@ -0,0 +1,406 @@ +#include "prelude.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "error.h" +#include "parse.h" + +void free_token(Token *token) +{ + free((char *)token->value); + free(token); +} + +void free_parsenode(ParseNode *node) +{ + switch (node->type) { + case IDENT_NODE: + free((char *)node->node.ident.value); + break; + case NUMBER_NODE: + break; + case UNARY_NODE: + free_parsenode((ParseNode *)node->node.unary.callee); + free_parsenode((ParseNode *)node->node.unary.operand); + break; + case BINARY_NODE: + free_parsenode((ParseNode *)node->node.binary.callee); + free_parsenode((ParseNode *)node->node.binary.left); + free_parsenode((ParseNode *)node->node.binary.right); + break; + } + free(node); +} + +/* --- Functions related to tokenising/lexing --- */ + +Token *new_token(TokenType type, const char *value) +{ + Token *t = malloc(sizeof(Token)); + t->type = type; + t->value = strdup(value); + return t; +} + +TokenType char_token_type(char c, TokenType last_token_type) +{ + if (c <= '9' && c >= '0') { + if (last_token_type == TT_IDENTIFIER) + return TT_IDENTIFIER; + else + return TT_NUMERIC; + } + if (c == '_' + && (last_token_type == TT_IDENTIFIER + || last_token_type == TT_NUMERIC)) + return last_token_type; + if (c == '.' + && (last_token_type == TT_OPERATOR + || last_token_type == TT_NONE)) + return TT_NUMERIC; + if ((c == '.' || c == 'E') + && last_token_type == TT_NUMERIC) + return TT_NUMERIC; + if (c == '(') + return TT_LPAREN; + if (c == ')') + return TT_RPAREN; + if (c == '\0' || c == ' ' || c == '\t' || c == '\n' || c == '\r') + return TT_NONE; + + // All possible operator/special-symbol characters: + if ((c >= '!' && c <= '/') + || (c >= ':' && c <= '@') + || (c >= '{' && c <= '~')) { + return TT_OPERATOR; + } + + // Anything else is treated as an identifier. + return TT_IDENTIFIER; +} + +/// Returns a token matching in the source, and increments the +/// pointer to point after the match. This function is to be +/// applied repeatedly until the source is empty (NULL). +/// NOTE: Return of NULL means EOF or an Error was found. +/// NOTE: `source' is a pointer to a pointer, so that the +/// character `source' is pointing to can be offset. +Token *lex(char **source) +{ + if (**source == '\0') + return NULL; // No more tokens. + + TokenType tt = char_token_type(**source, TT_NONE); + + // Skip over TT_NONE tokens (spaces, tabs, etc.). + while (tt == TT_NONE) { + (*source)++; + if (**source == '\0') + return NULL; + tt = char_token_type(**source, tt); + } + + // First of all, check if it matches an operator. + for (usize i = 0; i < len(KNOWN_OPERATORS); ++i) { + const char *operator = KNOWN_OPERATORS[i].value; + usize operator_len = strlen(operator); + + if (strncmp(*source, operator, operator_len) == 0) { + Token *token = new_token(TT_OPERATOR, operator); + *source += operator_len; + return token; + } + } + + // If we match an operator-type character, and it wasn't + // found in our for-loop, it means it is an unknown operator, + // and we should report it as an error. + if (tt == TT_OPERATOR) { + ERROR_TYPE = SYNTAX_ERROR; + sprintf(ERROR_MSG, "Operator `%c' does not exist.", **source); + return NULL; + } + + // Now, we check the type of the current character, + // then we check the next character, if the type is the same + // we continue collecting the characters until the type is no longer + // the same, and we return the token. + + // That's to say, characters of the same `kind' coalesce. + TokenType previous_tt = tt; + usize span = 0; + + // Do not coalesce parentheses. + if (tt == TT_RPAREN || tt == TT_LPAREN) { + span++; + } else { + while (tt == previous_tt) { + span++; + previous_tt = char_token_type(*(*source + span), previous_tt); + } + } + + char *sub_str = malloc(span * sizeof(char)); + strncpy(sub_str, *source, span); + sub_str[span] = '\0'; + + *source += span; + Token *token = new_token(tt, sub_str); + return token; +} + +Token *peek(char **source) +{ + char *original = *source; + Token *token = lex(source); + *source = original; + return token; +} + +/* --- Functions related to parsing into a tree --- */ + +void node_into_ident(const char *str, ParseNode *node) +{ + IdentNode *ident = malloc(sizeof(IdentNode)); + ident->value = strdup(str); + node->type = IDENT_NODE; + node->node.ident = *ident; +} + +// TODO: Support more than just INTs. +NumberNode *parse_number(const char *str) +{ + NumberNode *number = malloc(sizeof(NumberNode)); + number->type = INT; + number->value.i = atoi(str); + return number; +} + +ParseNode *parse_prefix(const Token *token, char **rest) +{ + ParseNode *node = malloc(sizeof(ParseNode)); + + switch (token->type) { + case TT_NUMERIC: { + NumberNode *num = parse_number(token->value); + if (num == NULL) { + ERROR_TYPE = SYNTAX_ERROR; + sprintf(ERROR_MSG, "Malformatted number literal (`%s').", + token->value); + return NULL; + } + + node->type = NUMBER_NODE; + node->node.number = *num; + break; + } + case TT_IDENTIFIER: { + node_into_ident(token->value, node); + break; + } + case TT_OPERATOR: { + // Verify this is a prefix operator. + bool is_prefix = false; + u16 precedence = 0; + for (usize i = 0; i < len(KNOWN_OPERATORS); ++i) { + Operator op = KNOWN_OPERATORS[i]; + if (strcmp(token->value, op.value) == 0 && op.fixity == PREFIX) { + is_prefix = true; + precedence = op.precedence; + break; + } + } + if (!is_prefix) { + ERROR_TYPE = PARSE_ERROR; + sprintf(ERROR_MSG, + "`%s' operator cannot be used as a prefix.\n" + " Missing left-hand-side argument of `%s' operator.", + token->value, token->value); + return NULL; + } + + UnaryNode *unary = malloc(sizeof(UnaryNode)); + ParseNode *callee = malloc(sizeof(ParseNode)); + node_into_ident(token->value, callee); + + unary->callee = callee; + unary->operand = parse_expr(rest, precedence); + unary->is_postfix = false; + + node->type = UNARY_NODE; + node->node.unary = *unary; + + break; + } + case TT_LPAREN: { + node = parse_expr(rest, 0); + token = lex(rest); + if (token == NULL || token->type != TT_RPAREN) { + ERROR_TYPE = PARSE_ERROR; + sprintf(ERROR_MSG, "Unclosed paranthetical expression.\n" + " Missing `)' closing parenthesis."); + free_token((Token *)token); + return NULL; + } + free_token((Token *)token); + break; + } + default: + return NULL; + } + + return node; +} + +u16 token_precedence(Token *token) +{ + // Check if its an `)'. + if (token->type == TT_RPAREN) + return 0; + // Check if its an operator. + for (usize i = 0; i < len(KNOWN_OPERATORS); ++i) { + Operator op = KNOWN_OPERATORS[i]; + if (strcmp(token->value, op.value) == 0 + && (op.fixity == INFIX || op.fixity == POSTFIX)) { + return op.precedence; + } + } + // Otherwise, assume it's a function application. + return FUNCTION_PRECEDENCE; +} + +ParseNode *parse_infix(const ParseNode *left, + const Token *token, + char **rest, u16 last_precedence) +{ + bool is_postfix = false; + bool is_operator = false; + u16 precedence = 0; + Associativity assoc = NEITHER_ASSOC; + + // Extract information on operator (if operator at all) + for (usize i = 0; i < len(KNOWN_OPERATORS); ++i) { + Operator op = KNOWN_OPERATORS[i]; + if (strcmp(token->value, op.value) == 0) { + is_operator = true; + precedence = op.precedence; + assoc = op.assoc; + if (op.fixity == POSTFIX) { + is_postfix = true; + break; + } + } + } + + // Function calls and post fix operators share some common code. + if (is_postfix || !is_operator) { + UnaryNode *unary = malloc(sizeof(UnaryNode)); + + if (is_postfix) { // Postfix operator. + ParseNode *node = malloc(sizeof(ParseNode)); + node_into_ident(token->value, node); + unary->callee = node; + unary->operand = left; + unary->is_postfix = true; + } else { // Function call. + unary->callee = left; + unary->operand = parse_expr(rest, FUNCTION_PRECEDENCE); + unary->is_postfix = false; + } + + ParseNode *node = malloc(sizeof(ParseNode)); + node->type = UNARY_NODE; + node->node.unary = *unary; + + return node; + } + + // Trick for right associativity is to pretend that what's + // to the left of you is sligtly higher precedence. + if (assoc == RIGHT_ASSOC) + precedence -= 1; + + // Don't allow chaining of operators with no + // left or right associativity. + if (assoc == NEITHER_ASSOC && precedence == last_precedence) { + ERROR_TYPE = PARSE_ERROR; + sprintf(ERROR_MSG, "Operator `%s' cannot be chained with\n" + " operators of equal precedence, please use parentheses.", + token->value); + return NULL; + } + + // Binary operator: + + // Call node. + ParseNode *callee = malloc(sizeof(ParseNode)); + node_into_ident(token->value, callee); + + // Root binary node. + ParseNode *binary_node = malloc(sizeof(ParseNode)); + BinaryNode *binary = malloc(sizeof(BinaryNode)); + + // Populate binary struct. + binary->callee = callee; + binary->left = left; + binary->right = parse_expr(rest, precedence); + + if (binary->right == NULL) { + ERROR_TYPE = PARSE_ERROR; + sprintf(ERROR_MSG, + "Attempted to use `%s' infix-operator as a suffix.\n" + " Missing right-hand-side argument of `%s' operator.", + token->value, token->value); + return NULL; + } + + // Populate parse node as binary parse node. + binary_node->type = BINARY_NODE; + binary_node->node.binary = *binary; + + return binary_node; +} + +ParseNode *parse_expr(char **slice, u16 precedence) +{ + Token *token = lex(slice); + + if (token == NULL) + return NULL; + + ParseNode *left = parse_prefix(token, slice); + + Token *token_ahead = peek(slice); + if (token_ahead == NULL) + return left; + + u16 current_precedence = token_precedence(token_ahead); + u16 previous_precedence = 0; + + while (precedence < current_precedence) { + if (current_precedence != FUNCTION_PRECEDENCE) + token = lex(slice); + + left = parse_infix(left, token, slice, previous_precedence); + if (left == NULL || **slice == '\0') + break; + + token_ahead = peek(slice); + if (token == NULL) + break; + + previous_precedence = current_precedence; + current_precedence = token_precedence(token_ahead); + } + + free_token((Token *)token_ahead); + free_token((Token *)token); + return left; +} + +ParseNode *parse(char *source) +{ + return parse_expr(&source, 0); +} diff --git a/src/parse.h b/src/parse.h @@ -0,0 +1,130 @@ +#pragma once + +// Tokens: +typedef enum { + TT_LPAREN, TT_RPAREN, + TT_IDENTIFIER, + TT_NUMERIC, + TT_OPERATOR, + TT_NONE, +} TokenType; + +typedef struct { + TokenType type; + const char *value; +} Token; + +Token *new_token(TokenType, const char *); + +// Operator properties. +typedef enum { + LEFT_ASSOC, + RIGHT_ASSOC, + NEITHER_ASSOC, +} Associativity; + +typedef enum { + PREFIX, + INFIX, + POSTFIX, +} Fixity; + +typedef struct { + const char *value; + u16 precedence; + Associativity assoc; + Fixity fixity; +} Operator; + +static const u16 FUNCTION_PRECEDENCE = 9; +// Known operators from longest to shortests. +static const Operator KNOWN_OPERATORS[] = { + // 3 characters long. + { "not", 8, RIGHT_ASSOC, PREFIX }, + // 2 characters long. + { "**", 7, RIGHT_ASSOC, INFIX }, + { "<=", 4, LEFT_ASSOC, INFIX }, + { ">=", 4, LEFT_ASSOC, INFIX }, + { "==", 3, LEFT_ASSOC, INFIX }, + { "/=", 3, LEFT_ASSOC, INFIX }, + // 1 character long. + { "-", 10, RIGHT_ASSOC, PREFIX }, + { "+", 10, RIGHT_ASSOC, PREFIX }, + { "¬", 10, RIGHT_ASSOC, PREFIX }, + { "!", 10, LEFT_ASSOC, POSTFIX }, + { "^", 7, RIGHT_ASSOC, INFIX }, + { "*", 6, LEFT_ASSOC, INFIX }, + { "/", 6, LEFT_ASSOC, INFIX }, + { "+", 5, LEFT_ASSOC, INFIX }, + { "-", 5, LEFT_ASSOC, INFIX }, + { ">", 4, LEFT_ASSOC, INFIX }, + { "<", 4, LEFT_ASSOC, INFIX }, + { "=", 2, RIGHT_ASSOC, INFIX }, + { ",", 1, LEFT_ASSOC, INFIX }, +}; + +// Parse tree nodes: +typedef enum { + IDENT_NODE, + NUMBER_NODE, + UNARY_NODE, + BINARY_NODE, +} NodeType; + +struct _parse_node; + +typedef struct { + char *value; +} IdentNode; + +typedef enum { + FLOAT, + INT, + BIGINT, // Not supported yet. + RATIO, // Not supported yet. +} NumberType; + +typedef struct { + NumberType type; + union { + fsize f; + ssize i; + // Add bigint_t and ratio_t in future. + } value; +} NumberNode; + +typedef struct { + const struct _parse_node *callee; + const struct _parse_node *operand; + bool is_postfix; +} UnaryNode; + +typedef struct { + const struct _parse_node *callee; + const struct _parse_node *left; + const struct _parse_node *right; +} BinaryNode; + +typedef struct _parse_node { + NodeType type; + union { + IdentNode ident; + NumberNode number; + UnaryNode unary; + BinaryNode binary; + } node; +} ParseNode; + +// Functions: + +void free_token(Token *); +void free_parsenode(ParseNode *); + +Token *lex(char **); +Token *peek(char **); + +NumberNode *parse_number(const char *); +ParseNode *parse_prefix(const Token *, char **); +ParseNode *parse_infix(const ParseNode *, const Token *, char **, u16); +ParseNode *parse_expr(char **, u16); +ParseNode *parse(char *); diff --git a/src/prelude.c b/src/prelude.c @@ -0,0 +1,23 @@ +#include "prelude.h" + +char *trim(char *str) +{ + while (isspace(*str)) + ++str; + + char *end = str + strlen(str) - 1; + while (end > str && isspace(*end)) + --end; + + *(end + 1) = '\0'; + return str; +} + + +char *downcase(char *str) +{ + char *p = strdup(str); + char *start = p; + for (; *p; ++p) *p = tolower(*p); + return start; +} diff --git a/src/prelude.h b/src/prelude.h @@ -0,0 +1,71 @@ +#pragma once + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <ctype.h> + +#include "error.h" + +#define len(array) (sizeof(array) / sizeof((array)[0])) + +typedef uint8_t u8 ; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t s8 ; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +typedef size_t usize; +typedef ptrdiff_t ssize; + +typedef float f32; +typedef double f64; +typedef long double fsize; + +char *trim(char *); +char *downcase(char *); + +#define STR_HELPER(x) #x +#define STR(x) STR_HELPER(x) + +// Check windows +#if defined(_WIN32) || defined(_WIN64) + #ifdef _WIN64 + #define ARCH64 + #else + #define ARCH32 + #endif +#endif + +// Check GCC +#ifdef __GNUC__ + #if defined(__x86_64__) || defined(__ppc64__) + #define ARCH64 + #else + #define ARCH32 + #endif +#endif + +#if defined(_MSC_VER) + #define COMPILER "Visual Studio " STR(VS) +#elif defined(__GNUC__) + #define COMPILER "GCC " STR(__GNUC__) "." STR(__GNUC_MINOR__) +#elif defined(__clang__) + #define COMPILER "Clang " STR(__clang_major__) "." STR(__clang_minor__) +#elif defined(__EMSCRIPTEN__) + #define COMPILER "WebAssembly " STR(__EMSCRIPTEN__) +#elif defined(__MINGW32__) + #define COMPILER "MinGW 32bit " STR(__MINGW32__) +#elif defined(__MINGW64__) + #define COMPILER "MinGW 64bit " STR(__MINGW64__) +#else + #define COMPILER "Unknown Compiler" +#endif +