Step 1: Create an AST

This step consists of creating the AST to represent the input
expression, the only evaluation done for now is the recognition of three
MalTypes for the nodes of the AST:
- Symbols: atomic isolated groups of characters
- Integers: Symbols that can be parsed as number and are treated as so
- Lists: recognizable by the presence of parentheses (only "()" for now,
  "[]" and "{}" later), these can contain any number of MalTypes

The second half of this step (much easier) is to reconstruct the
original syntax (with clean whitespaces) to check the correctness of the
process

Signed-off-by: teo3300 <matteo.rogora@live.it>
This commit is contained in:
teo3300
2023-06-05 23:04:51 +02:00
parent 9144fc04bc
commit 13790d0864
5 changed files with 194 additions and 3 deletions

View File

@ -1,8 +1,12 @@
// io lib to read input and print output
use std::io::{self, Write};
mod step0_repl;
use step0_repl::rep;
mod printer;
mod reader;
mod types;
mod step1_read_print;
use step1_read_print::rep;
fn main() -> io::Result<()> {
loop {
@ -14,6 +18,6 @@ fn main() -> io::Result<()> {
io::stdin().read_line(&mut input)?;
print!("{}", rep(&input));
println!("{}", rep(&input.replace("\n", " ")));
}
}

15
src/printer.rs Normal file
View File

@ -0,0 +1,15 @@
use crate::types::MalType;
pub fn pr_str(ast: &MalType) -> String {
match ast {
MalType::Symbol(sym) => sym.to_string(),
MalType::Integer(val) => val.to_string(),
MalType::List(el) => format!(
"({})",
el.iter()
.map(|sub| pr_str(sub))
.collect::<Vec<String>>()
.join(" ")
),
}
}

130
src/reader.rs Normal file
View File

@ -0,0 +1,130 @@
use std::collections::VecDeque;
use crate::types::MalType;
use regex::Regex;
pub struct Reader {
tokens: VecDeque<String>,
}
const PAREN_ERROR: &str =
"Looks like you reached a dead end, did you perhaps miss any \")\" or left some extra \"(\"?";
impl Reader {
fn new(tokens: VecDeque<String>) -> Reader {
Reader { tokens }
}
/// Returns the token at the current positioni
fn peek(&self) -> &str {
match self.tokens.get(0) {
Some(token) => token,
None => panic!("{}", PAREN_ERROR),
}
}
/// Returns the token at current position and increment current position
// TODO: PLEASE USE THE PEEK FUNCTION
fn next(&mut self) -> String {
match self.tokens.pop_front() {
Some(token) => token,
None => panic!("{}", PAREN_ERROR),
}
}
/// Repeatedly calls `read_form` of the reader object until it finds a ")" token
/// EOF -> Return an error (Dyck language error)
/// Accumulates results into a MalList
/// NOTE: `read_list` calls `read_form` -> enable recursion
/// (lists can contains other lists)
fn read_list(&mut self) -> MalType {
MalType::List(
// Iterate over the the list
std::iter::from_fn(|| match self.peek() {
// consume "(" and return
")" => {
self.next();
None
}
// Add read the token recursively
_ => Some(self.read_form()),
})
// create vector to return
.collect(),
)
}
/// Read atomic token and return appropriate scalar ()
fn read_atom(&mut self) -> MalType {
let token = self.next();
// parse the token as an integer
match token.parse::<i32>() {
// On success assign the value
Ok(value) => MalType::Integer(value),
// Otherwise assign the symbol
Err(_) => match token.as_str() {
")" => panic!("Invalid token \")\""),
_ => MalType::Symbol(token),
},
}
}
/// Peek at the first token in reader
///
/// Switch on the first character
/// "(" -> call `read_list`
/// otherwise -> call `read_atom`
fn read_form(&mut self) -> MalType {
match self.peek() {
// Consume "(" and parse list
"(" => {
self.next();
self.read_list()
}
// read atomically
_ => self.read_atom(),
}
}
}
#[allow(dead_code)]
fn pretty_print(ast: &MalType, base: usize) {
print!("{}", "".repeat(base));
match ast {
MalType::Symbol(sym) => println!("Sym: {}", sym),
MalType::Integer(val) => println!("Int: {}", val),
MalType::List(vec) => {
println!("List: ");
for el in vec {
pretty_print(el, base + 1);
}
}
}
}
/// Call `tokenize` on a string
/// Create anew Reader with the tokens
/// Call read_from with the reader instance
pub fn read_str(input: &str) -> MalType {
let ast = Reader::new(tokenize(input)).read_form();
// pretty_print(&ast, 0);
ast
}
/// Read a string and return a list of tokens in it (following regex in README)
// Add error handling for strings that are not terminated
fn tokenize(input: &str) -> VecDeque<String> {
let mut tokens = VecDeque::new();
let re =
Regex::new(r###"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)"###)
.unwrap();
for match_str in re.captures_iter(input) {
if match_str[1].len() > 0 {
tokens.push_back(match_str[1].to_string());
}
}
tokens
}

33
src/step1_read_print.rs Normal file
View File

@ -0,0 +1,33 @@
// Structure the main functions of the interpreter
//
// For now just act as an echo, note that each function should not modify the
// input, thus this can be referenced by the previous step without the need
// to allocate more memory
use crate::printer::pr_str;
use crate::reader::read_str;
use crate::types::MalType;
#[allow(non_snake_case)]
/// Read input and generate an ast
fn READ(input: &str) -> MalType {
read_str(input)
}
#[allow(non_snake_case)]
/// Evaluate the generated ast
fn EVAL(ast: MalType) -> MalType {
ast
}
#[allow(non_snake_case)]
/// Print out the result of the evaluation
fn PRINT(input: MalType) -> String {
pr_str(&input)
}
pub fn rep(input: &str) -> String {
let ast = READ(input);
let out = EVAL(ast);
PRINT(out /*&result*/)
}

9
src/types.rs Normal file
View File

@ -0,0 +1,9 @@
// TODO: use enums for MalTypes
// All Mal types should inherit from this
#[derive(Debug)]
pub enum MalType {
List(Vec<MalType>),
Symbol(String),
Integer(i32),
}