mirror of
https://github.com/teo3300/rust-mal.git
synced 2026-01-12 01:05:32 +01:00
342 lines
10 KiB
Rust
342 lines
10 KiB
Rust
use std::cell::{Cell, RefCell};
|
|
use std::rc::Rc;
|
|
|
|
// Specyfy components in "types"
|
|
use crate::types::*;
|
|
// By specifying enum variants it's possible to omit namespace
|
|
use crate::types::MalType::*;
|
|
|
|
use regex::Regex;
|
|
|
|
pub struct Reader {
|
|
tokens: RefCell<Vec<String>>,
|
|
ptr: Cell<usize>,
|
|
}
|
|
|
|
type Tokens = Vec<String>;
|
|
|
|
// DONE: instead of panic on missing ")" try implementing a multi line parsing
|
|
// Status on return should always be The last element of the last opened lists
|
|
// (append to the "last" list) while traversing
|
|
impl Reader {
|
|
pub fn new() -> Reader {
|
|
Reader {
|
|
tokens: RefCell::new(Vec::new()),
|
|
ptr: Cell::new(0),
|
|
}
|
|
}
|
|
|
|
pub fn push(&self, input: &str) -> &Self {
|
|
self.ptr.set(0);
|
|
// reset the state of the parser and push the additional strings
|
|
self.tokens.borrow_mut().append(&mut tokenize(input));
|
|
self
|
|
}
|
|
|
|
pub fn clear(&self) {
|
|
self.ptr.set(0);
|
|
*self.tokens.borrow_mut() = Vec::new();
|
|
}
|
|
|
|
// May be improved
|
|
fn get_token(&self, i: usize) -> Result<String, MalErr> {
|
|
self.tokens
|
|
.borrow()
|
|
.get(i)
|
|
.ok_or(MalErr::recoverable("Unexpected EOF"))
|
|
.cloned()
|
|
}
|
|
|
|
/// Returns the token at the current position
|
|
fn peek(&self) -> Result<String, MalErr> {
|
|
self.get_token(self.ptr.get())
|
|
}
|
|
|
|
/// Returns the token at current position and increment current position
|
|
fn next(&self) -> Result<String, MalErr> {
|
|
self.ptr.set(self.ptr.get() + 1);
|
|
self.get_token(self.ptr.get() - 1)
|
|
}
|
|
|
|
/// Returns true if the reader has been consumed entirely
|
|
pub fn ended(&self) -> bool {
|
|
self.tokens.borrow().len() == self.ptr.get()
|
|
}
|
|
|
|
/// Repeatedly calls `read_form` of the reader object until it finds a ")" token
|
|
/// EOF -> Return an error (Dyck language error)
|
|
/// Accumulates results into a MalList
|
|
/// NOTE: `read_list` calls `read_form` -> enable recursion
|
|
/// (lists can contains other lists)
|
|
fn read_list(&self, terminator: &str) -> MalRet {
|
|
self.next()?;
|
|
|
|
let mut vector = Vec::new();
|
|
|
|
while self.peek()? != terminator {
|
|
vector.push(self.read_form()?)
|
|
}
|
|
self.next()?;
|
|
|
|
match terminator {
|
|
")" => Ok(List(vector.into())),
|
|
"]" => Ok(Vector(vector.into())),
|
|
"}" => make_map(vector.into()),
|
|
t => Err(MalErr::unrecoverable(
|
|
format!("Unknown collection terminator: {}", t).as_str(),
|
|
)),
|
|
}
|
|
}
|
|
|
|
/// Read atomic token and return appropriate scalar ()
|
|
fn read_atom(&self) -> MalRet {
|
|
match &self.next()?[..] {
|
|
")" | "]" | "}" => Err(MalErr::unrecoverable("Missing open parenthesis")),
|
|
"false" => Ok(Bool(false)),
|
|
"true" => Ok(Bool(true)),
|
|
"nil" => Ok(Nil),
|
|
tk => {
|
|
if Regex::new(r"^-?[0-9]+$").unwrap().is_match(tk) {
|
|
return Ok(Int(tk.parse::<isize>().unwrap()));
|
|
}
|
|
if tk.starts_with('\"') {
|
|
if tk.len() > 1 && tk.ends_with('\"') {
|
|
return Ok(Str(unescape_str(tk).into()));
|
|
}
|
|
return Err(MalErr::unrecoverable(
|
|
"End of line reached without closing string",
|
|
));
|
|
}
|
|
if tk.starts_with(':') {
|
|
return Ok(Key(format!("ʞ{}", tk).into()));
|
|
}
|
|
Ok(Sym(tk.into()))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Peek at the first token in reader
|
|
///
|
|
/// Switch on the first character
|
|
/// "(" -> call `read_list`
|
|
/// otherwise -> call `read_atom`
|
|
fn read_form(&self) -> MalRet {
|
|
// String slice containing the whole string
|
|
match &self.peek()?[..] {
|
|
// Consume "(" and parse list
|
|
"(" => self.read_list(")"),
|
|
"[" => self.read_list("]"),
|
|
"{" => self.read_list("}"),
|
|
// Ugly quote transformation for quote expansion
|
|
"'" => {
|
|
self.next()?;
|
|
Ok(List(Rc::new([
|
|
MalType::Sym("quote".into()),
|
|
self.read_form()?,
|
|
])))
|
|
}
|
|
"@" => {
|
|
self.next()?;
|
|
Ok(List(Rc::new([
|
|
MalType::Sym("deref".into()),
|
|
self.read_form()?,
|
|
])))
|
|
}
|
|
_ => self.read_atom(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Call `tokenize` on a string
|
|
/// Create anew Reader with the tokens
|
|
/// Call read_from with the reader instance
|
|
pub fn read_str(reader: &Reader) -> MalRet {
|
|
let mut ret = Nil;
|
|
while !reader.ended() {
|
|
ret = reader.read_form()?;
|
|
}
|
|
Ok(ret)
|
|
}
|
|
|
|
/// Read a string and return a list of tokens in it (following regex in README)
|
|
// Add error handling for strings that are not terminated
|
|
fn tokenize(input: &str) -> Tokens {
|
|
let tokens =
|
|
Regex::new(r#"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)"#)
|
|
.unwrap()
|
|
.captures_iter(input)
|
|
.map(|e| e[1].to_string())
|
|
.filter(|e| !(e.is_empty() || e.starts_with(';')))
|
|
.collect::<Vec<String>>();
|
|
tokens
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Tests //
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::borrow::Borrow;
|
|
|
|
use crate::{
|
|
reader::read_str,
|
|
types::{MalMap, MalType as M},
|
|
};
|
|
|
|
use super::{tokenize, Reader};
|
|
|
|
fn reader_setup1() -> Reader {
|
|
let r = Reader::new();
|
|
r.push("()[]{} \"str\" :key sym 1 ; comment");
|
|
return r;
|
|
}
|
|
|
|
#[test]
|
|
fn init() {
|
|
let r = Reader::new();
|
|
assert_eq!(r.tokens.borrow().len(), 0);
|
|
assert_eq!(r.ptr.get(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn do_tokenize() {
|
|
assert_eq!(
|
|
tokenize("()[]{} \"str\" :key sym 1 ; comment"),
|
|
vec!["(", ")", "[", "]", "{", "}", "\"str\"", ":key", "sym", "1"]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn push() {
|
|
let r = reader_setup1();
|
|
let mut tokens = Vec::new();
|
|
r.tokens
|
|
.borrow()
|
|
.iter()
|
|
.for_each(|i| tokens.push(i.clone()));
|
|
assert_eq!(
|
|
tokens,
|
|
vec!["(", ")", "[", "]", "{", "}", "\"str\"", ":key", "sym", "1"]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn get_token() {
|
|
let r = reader_setup1();
|
|
assert!(matches!(r.get_token(0), Ok(i) if i == "("));
|
|
assert!(matches!(r.get_token(9), Ok(i) if i == "1"));
|
|
assert!(matches!(r.get_token(10), Err(e) if e.is_recoverable()));
|
|
}
|
|
|
|
#[test]
|
|
fn get() {
|
|
let r = reader_setup1();
|
|
assert!(matches!(r.peek(), Ok(i) if i == "("));
|
|
assert_eq!(r.ptr.get(), 0);
|
|
assert!(matches!(r.next(), Ok(i) if i == "("));
|
|
assert_eq!(r.ptr.get(), 1);
|
|
assert!(matches!(r.next(), Ok(i) if i == ")"));
|
|
assert_eq!(r.ptr.get(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn clear() {
|
|
let r = reader_setup1();
|
|
r.clear();
|
|
assert_eq!(r.tokens.borrow().len(), 0);
|
|
assert_eq!(r.ptr.get(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn ended() {
|
|
let r = reader_setup1();
|
|
assert!(!r.ended());
|
|
for _ in r.tokens.borrow().iter() {
|
|
assert!(matches!(r.next(), Ok(_)))
|
|
}
|
|
assert!(r.ended());
|
|
}
|
|
|
|
#[test]
|
|
fn errors() {
|
|
let r = Reader::new();
|
|
// Correct throws error
|
|
assert!(matches!(r.peek(), Err(e) if e.is_recoverable()));
|
|
assert!(matches!(r.next(), Err(e) if e.is_recoverable()));
|
|
}
|
|
|
|
#[test]
|
|
fn read_atom() {
|
|
let r = Reader::new();
|
|
r.push("nil 1 true a \"s\" :a ) ] }");
|
|
assert!(matches!(r.read_atom(), Ok(x) if matches!(x, M::Nil)));
|
|
assert!(matches!(r.read_atom(), Ok(x) if matches!(x, M::Int(1))));
|
|
assert!(matches!(r.read_atom(), Ok(x) if matches!(x, M::Bool(true))));
|
|
assert!(
|
|
matches!(r.read_atom(), Ok(x) if matches!(x.clone(), M::Sym(v) if matches!(v.borrow(), "a")))
|
|
);
|
|
assert!(
|
|
matches!(r.read_atom(), Ok(x) if matches!(x.clone(), M::Str(v) if matches!(v.borrow(), "s")))
|
|
);
|
|
assert!(
|
|
matches!(r.read_atom(), Ok(x) if matches!(x.clone(), M::Key(v) if matches!(v.borrow(), "ʞ:a")))
|
|
);
|
|
assert!(matches!(r.read_atom(), Err(e) if !e.is_recoverable()));
|
|
assert!(matches!(r.read_atom(), Err(e) if !e.is_recoverable()));
|
|
assert!(matches!(r.read_atom(), Err(e) if !e.is_recoverable()));
|
|
}
|
|
|
|
#[test]
|
|
fn _read_str() {
|
|
let r = Reader::new();
|
|
|
|
// Test list
|
|
let expected = vec![1, 2, 12];
|
|
r.push("(1 2 12)");
|
|
assert!(matches!(
|
|
read_str(&r), Ok(x)
|
|
if matches!(x.clone(), M::List(list)
|
|
if list.len() == expected.len()
|
|
&& list.iter().zip(expected)
|
|
.all(|(x, y)| matches!(x, M::Int(v) if (*v as isize) == y)))));
|
|
r.clear();
|
|
|
|
// Test vector
|
|
let exp = vec![1, 2, 12];
|
|
r.push("[1 2 12]");
|
|
assert!(matches!(
|
|
read_str(&r), Ok(x)
|
|
if matches!(x.clone(), M::Vector(list)
|
|
if list.len() == exp.len()
|
|
&& list.iter().zip(exp)
|
|
.all(|(x, y)| matches!(x, M::Int(v) if (*v as isize) == y)))));
|
|
r.clear();
|
|
|
|
// Test map
|
|
r.push("{\"i\" 1 \"s\" \"str\" \"t\" true \"n\" nil :s :sym}");
|
|
let t = match read_str(&r) {
|
|
Ok(x) => match x {
|
|
M::Map(x) => x,
|
|
_ => {
|
|
assert!(false);
|
|
MalMap::new()
|
|
}
|
|
},
|
|
_ => {
|
|
assert!(false);
|
|
MalMap::new()
|
|
}
|
|
};
|
|
assert!(matches!(t.get("n"), Some(x) if matches!(&x, M::Nil)));
|
|
assert!(matches!(t.get("t"), Some(x) if matches!(&x, M::Bool(v) if *v)));
|
|
assert!(matches!(t.get("i"), Some(x) if matches!(&x, M::Int(v) if *v == 1)));
|
|
assert!(
|
|
matches!(t.get("s"), Some(x) if matches!(&x, M::Str(v) if matches!(v.borrow(), "str")))
|
|
);
|
|
assert!(
|
|
matches!(t.get("ʞ:s"), Some(x) if matches!(&x, M::Key(v) if matches!(v.borrow(), "ʞ:sym")))
|
|
);
|
|
}
|
|
}
|