diff --git a/Cargo.toml b/Cargo.toml index f7f33fb..1ff376b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,10 @@ cc = "^1.0" [dependencies] lazy_static = "^1" libc = "^0.2" +nom = { version = "7.1", optional = true } [features] fn_traits = [] ignore_target = [] +parsing = ["nom"] +default = ["parsing"] diff --git a/README.md b/README.md index b68643b..a72c1d6 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,57 @@ -# Simple RPN Math Compiler +# Simple Math Compiler -Takes a sequence of commands and gives a `Function` object from which can give a function pointer taking zero to six `isize`s and returning `isize`. +Compiles a simple language. -### Commands: +### Syntax: -* `a`: Push the first function argument to the stack -* `b`: Push the second function argument to the stack -* `c`: Push the third function argument to the stack -* `d`: Push the fourth function argument to the stack -* `e`: Push the fifth function argument to the stack -* `f`: Push the sixth function argument to the stack -* ``: Push value to stack -* `p`: Push to the stack a copy of the Nth value from the top of the stack (0-indexed from the top) -* `s`: Pop a value from the stack and set the Nth value from the top of the stack (0-indexed from the top, after the pop) to that value -* `+`: Pop two values, push their sum -* `*`: Pop two values, push their product -* `-`: Pop two values, push their difference (`a b -` gives a-b) -* `/`: Pop two values, push their quotient (`a b /` gives a/b) -* `%`: Pop two values, push their remainder (`a b %` gives a%b) +``` +program : item* +item : function_item | static_item +variable : `mut`? ident +function_item : `fn` ident `(` (variable),* `)` block +block : `{` statement* `}` +statement : let_stmt | expr_stmt | loop_stmt | return_stmt +let_stmt : `let` variable `=` expr `;` +expr_stmt : expr `;` +loop_stmt : `while` expr block +return_stmt : `return` expr `;` +static_item : `static` `atomic`? ident `=` expr `;` +expr : assign_expr +assign_expr : or_expr (assign_op assign_expr)* +or_expr : and_expr (or_op or_expr)* +and_expr : compare_expr (and_op and_expr)* +compare_expr : add_expr (compare_op compare_expr)* +add_expr : mul_expr (add_op add_expr)* +mul_expr : call_expr (mul_op mul_expr)* +call_expr : atom_expr | call_expr `(` (expr),* `)` +atom_expr : `(` expr `)` | block | ident | literal -### Loops: +assign_op : `=` +compare_op : `>` | `>=` | `<` | `<=` | `==` | `!=` +add_op : `+` `?`? | `-` `?`? +mul_op : `*` `?`? | `/` `?`? | `%` `?`? +``` -A loop starts with `{` and ends with `}`. Any commands (including other loops) may be inside a loop. The stack must have the same depth at the end of the loop. When execution reaches a loop, if the top value on the stack is zero, the loop will be skipped, otherwise the loop will begin. When an iteration of the loop finishes, if the value on the top of the stack is not zero, the loop will execute again, otherwise it will exit. Because loops read (but do not pop) the top value on the stack, the stack must have at least one element prior to a loop. ### Examples: #### Exponentiation: -`a 1 b { p2 p2 * s1 1 - } p1` +``` +fn exp(a, mut b) { + let mut result = 1; + while b > 0 { + result = result *? a; + } + return result; +} +``` Input: `4`, `3` -Output: `64` +Output: `Ok(64)` Execution: diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..57a169e --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,122 @@ + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Module { + pub items: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Item { + FunctionItem(Function), + StaticItem(Static), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Function { + pub name: String, + pub parameters: Vec, + pub body: Block, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Variable { + pub name: String, + pub mutable: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Static { + pub atomic: bool, + pub name: String, + pub value: Expression, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Block { + pub statements: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Statement { + LetStatement(Let), + ExpressionStatement(Expression), + LoopStatement(Loop), + ReturnStatement(Expression), +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Let { + pub variable: Variable, + pub value: Expression, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Loop { + pub condition: Expression, + pub body: Block, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Expression { + AssignExpression{ + rhs: Box, + op: Operator, + lhs: Box, + }, + OrExpression{ + rhs: Box, + op: Operator, + lhs: Box, + }, + AndExpression{ + rhs: Box, + op: Operator, + lhs: Box, + }, + CompareExpression{ + rhs: Box, + op: Operator, + lhs: Box, + }, + AddExpression{ + rhs: Box, + op: Operator, + lhs: Box, + }, + MulExpression{ + rhs: Box, + op: Operator, + lhs: Box, + }, + ParenExpression{ + expr: Box, + }, + CallExpression{ + function: Box, + args: Vec + }, + VariableExpression{ + name: String, + }, + BlockExpression{ + block: Block, + }, + IntLiteralExpression{ + literal: String, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Operator { + pub operator: String, + pub checked: bool, +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing; +#[cfg(feature = "parsing")] +pub use parsing::parse; + +#[cfg(feature = "parsing")] +pub(crate) mod lexing; +#[cfg(feature = "parsing")] +pub use lexing::lex; \ No newline at end of file diff --git a/src/ast/lexing.rs b/src/ast/lexing.rs new file mode 100644 index 0000000..b8de9f9 --- /dev/null +++ b/src/ast/lexing.rs @@ -0,0 +1,186 @@ +use std::{collections::HashMap}; +use nom::{ + IResult, + Parser, + bytes::complete::{tag}, + character::complete::{alpha1, alphanumeric1, multispace0, one_of}, + multi::{many0, many1}, + branch::alt, + combinator::{recognize, opt, eof}, + sequence::{pair, delimited, terminated}, +}; + +macro_rules! make_keywords { + ($(#[$attr:meta])* $vis:vis enum $name:ident { + $($variant:ident = $text:literal),* $(,)? + }) => { + $(#[$attr])* + $vis enum $name { + $($variant),* + } + lazy_static::lazy_static! { + static ref KEYWORDS: HashMap<&'static str, Keyword> = HashMap::from([ + $( ($text, $name::$variant) ),* + ]); + } + } +} + +make_keywords!{ +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum Keyword { + While = "while", + Fn = "fn", + Let = "let", + Return = "return", + Atomic = "atomic", + Mut = "mut", + Static = "static", +} +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum Token<'a> { + Identifier(&'a str), + Keyword(Keyword, &'a str), + Operator(&'a str, Option), + Punct(&'a str), + IntLiteral(&'a str), +} + +pub fn lex(input: &str) -> IResult<&str, Vec> { + delimited(multispace0, many0( + terminated(alt(( + ident_or_kw, + punct, + operator, + int_literal, + )), multispace0) + ), eof)(input) +} + +fn ident_or_kw(input: &str) -> IResult<&str, Token> { + let (rest, id) = recognize(pair( + alt((alpha1, tag("_"))), + many0(alt((alphanumeric1, tag("_")))) + ))(input)?; + match KEYWORDS.get(id) { + Some(kw) => Ok((rest, Token::Keyword(*kw, id))), + None => Ok((rest, Token::Identifier(id))), + } +} + +fn punct(input: &str) -> IResult<&str, Token> { + recognize(one_of("(){};,")).map(Token::Punct).parse(input) +} + +fn operator(input: &str) -> IResult<&str, Token> { + pair( + alt(( + tag("=="), + tag("!="), + tag(">="), + tag("<="), + tag("&&"), + tag("||"), + recognize(pair( // augmented assignment + one_of("+-*%/"), + tag("="), + )), + recognize(one_of("+-*%/=<>")) + )), + opt(nom::character::complete::char('?')) + ).map(|(o, c)| Token::Operator(o, c)).parse(input) +} + +fn int_literal(input: &str) -> IResult<&str, Token> { + recognize(pair( + opt(tag("-")), + many1(one_of("0123456789")) + )).map(|s| Token::IntLiteral(s)).parse(input) +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test1() { + let input = "Hello 45 static world(x, u32) - return"; + let output: Vec = vec![ + Token::Identifier("Hello"), + Token::IntLiteral("45"), + Token::Keyword(Keyword::Static, "static"), + Token::Identifier("world"), + Token::Punct("("), + Token::Identifier("x"), + Token::Punct(","), + Token::Identifier("u32"), + Token::Punct(")"), + Token::Operator("-", None), + Token::Keyword(Keyword::Return, "return"), + ]; + let result = lex(input).unwrap().1; + assert_eq!(result, output); + } + #[test] + fn test2() { + let input = r#" + fn test(a, mut b) { + let mut acc = 1; + while b >= 1 { + acc = acc *? a; + b -= 1; + } + return acc; + } + "#; + let output: Vec = vec![ + Token::Keyword(Keyword::Fn, "fn"), + Token::Identifier("test"), + Token::Punct("("), + Token::Identifier("a"), + Token::Punct(","), + Token::Keyword(Keyword::Mut, "mut"), + Token::Identifier("b"), + Token::Punct(")"), + + Token::Punct("{"), + + Token::Keyword(Keyword::Let, "let"), + Token::Keyword(Keyword::Mut, "mut"), + Token::Identifier("acc"), + Token::Operator("=", None), + Token::IntLiteral("1"), + Token::Punct(";"), + + Token::Keyword(Keyword::While, "while"), + Token::Identifier("b"), + Token::Operator(">=", None), + Token::IntLiteral("1"), + + Token::Punct("{"), + + Token::Identifier("acc"), + Token::Operator("=", None), + Token::Identifier("acc"), + Token::Operator("*", Some('?')), + Token::Identifier("a"), + Token::Punct(";"), + + Token::Identifier("b"), + Token::Operator("-=", None), + Token::IntLiteral("1"), + Token::Punct(";"), + + Token::Punct("}"), + + Token::Keyword(Keyword::Return, "return"), + Token::Identifier("acc"), + Token::Punct(";"), + + Token::Punct("}"), + ]; + let result = lex(input).unwrap().1; + assert_eq!(result, output); + } +} \ No newline at end of file diff --git a/src/ast/parsing.rs b/src/ast/parsing.rs new file mode 100644 index 0000000..855c011 --- /dev/null +++ b/src/ast/parsing.rs @@ -0,0 +1,419 @@ +use std::borrow::Cow; + +use nom::{ + IResult, + Parser, + multi::{many0, separated_list0}, + branch::alt, + combinator::{opt}, + sequence::{pair, tuple, delimited, terminated}, Err, error::ParseError, +}; + +use super::{*, lexing::{Token, Keyword}}; + +#[derive(Debug)] +pub enum Error { + Nom(nom::error::Error), + Other(Cow<'static, str>), +} + +impl ParseError for Error { + fn from_error_kind(input: I, kind: nom::error::ErrorKind) -> Self { + Self::Nom(ParseError::from_error_kind(input, kind)) + } + + fn append(input: I, kind: nom::error::ErrorKind, other: Self) -> Self { + match other { + Self::Other(msg) => Self::Other(msg), + Self::Nom(nom) => Self::Nom(ParseError::append(input, kind, nom)), + } + } +} + +type TokResult<'a, 'b, T> = IResult<&'a [Token<'b>], T, Error<&'a [Token<'b>]>>; + +pub(crate) fn ident<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, &'b str> { + match input.split_first() { + Some((Token::Identifier(id), rest)) => Ok((rest, id)), + None => Err(Err::Error(ParseError::from_error_kind( + input, + nom::error::ErrorKind::Eof, + ))), + _ => Err(Err::Error(Error::Other("expected identifier".into()))), + } +} + +pub(crate) fn keyword(kw: Keyword) -> impl for<'a, 'b> Fn(&'a[Token<'b>]) -> TokResult<'a, 'b, Keyword> { + move |input| { + match input.split_first() { + Some((Token::Keyword(k, _), rest)) if *k == kw => Ok((rest, kw)), + None => Err(Err::Error(ParseError::from_error_kind( + input, + nom::error::ErrorKind::Eof, + ))), + _ => Err(Err::Error(Error::Other(format!("expected keyword: {:?}", kw).into()))), + } + } +} + +pub(crate) fn punct(pu: &'static str) -> impl for<'a, 'b> Fn(&'a[Token<'b>]) -> TokResult<'a, 'b, &'static str> { + move |input| { + match input.split_first() { + Some((Token::Punct(p), rest)) if *p == pu => Ok((rest, pu)), + None => Err(Err::Error(ParseError::from_error_kind( + input, + nom::error::ErrorKind::Eof, + ))), + _ => Err(Err::Error(Error::Other(format!("expected punct: {:?}", pu).into()))), + } + } +} + +pub(crate) fn operator(op: Operator) -> impl for<'a, 'b> Fn(&'a[Token<'b>]) -> TokResult<'a, 'b, Operator> { + move |input| { + match input.split_first() { + Some((Token::Operator(o, c), rest)) + if *o == op.operator && (op.checked == c.is_some()) => Ok((rest, op.clone())), + None => Err(Err::Error(ParseError::from_error_kind( + input, + nom::error::ErrorKind::Eof, + ))), + _ => Err(Err::Error(Error::Other(format!("expected operator: {:?}", op).into()))), + } + } +} + +pub(crate) fn int_literal<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, &'b str> { + match input.split_first() { + Some((Token::IntLiteral(lit), rest)) => Ok((rest, lit)), + None => Err(Err::Error(ParseError::from_error_kind( + input, + nom::error::ErrorKind::Eof, + ))), + _ => Err(Err::Error(Error::Other("expected integer literal".into()))), + } +} + +macro_rules! make_operator_fn { + ($name:ident: $ops:tt, $err:literal) => { + fn $name<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Operator> { + const OPS: &[&str] = &$ops; + match input.split_first() { + Some((Token::Operator(o, c), rest)) => { + if OPS.contains(o) { + return Ok((rest, Operator{ + operator: o.to_string(), + checked: c.is_some(), + })); + } + Err(Err::Error(Error::Other(format!("expected {} operator: {:?}", $err, OPS).into()))) + }, + None => Err(Err::Error(ParseError::from_error_kind( + input, + nom::error::ErrorKind::Eof, + ))), + _ => Err(Err::Error(Error::Other(format!("expected {} operator: {:?}", $err, OPS).into()))), + } + } + }; +} + +make_operator_fn!(assign_op: ["=", "+=", "-=", "*=", "/=", "%="], "assignment"); +make_operator_fn!(compare_op: [">", ">=", "<", "<=", "==", "!="], "comparison"); +make_operator_fn!(and_op: ["&&"], "and"); +make_operator_fn!(or_op: ["||"], "or"); +make_operator_fn!(add_op: ["+", "-"], "addition-precedence"); +make_operator_fn!(mul_op: ["*", "%", "/"], "multiplication-precedence"); + +pub fn parse<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Module> { + many0(alt(( + function.map(|f| Item::FunctionItem(f)), + static_.map(|s| Item::StaticItem(s)), + ))).map(|items| Module { items }) + .parse(input) +} + +pub(crate) fn function<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Function> { + tuple(( + keyword(Keyword::Fn), + ident, + punct("("), + param_list, + punct(")"), + block, + )).map(|(_, name, _, parameters, _, body)| { + Function{ name: name.to_owned(), parameters, body } + }).parse(input) +} + +pub(crate) fn param_list<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Vec> { + separated_list0( + punct(","), + variable, + )(input) +} + +pub(crate) fn variable<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Variable> { + opt(keyword(Keyword::Mut)).and(ident).map(|(is_mut, id)| { + Variable{ name: id.to_owned(), mutable: is_mut.is_some() } + }).parse(input) +} + +pub(crate) fn block<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Block> { + delimited(punct("{"), many0(statement), punct("}")) + .map(|statements| Block{ statements }) + .parse(input) +} + +pub(crate) fn statement<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Statement> { + alt(( + let_stmt.map(Statement::LetStatement), + loop_stmt.map(Statement::LoopStatement), + return_stmt.map(Statement::ReturnStatement), + expr_stmt.map(Statement::ExpressionStatement), + ))(input) +} + +pub(crate) fn let_stmt<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Let> { + tuple(( + keyword(Keyword::Let), + variable, + operator(Operator { operator: "=".to_owned(), checked: false }), + expr, + punct(";"), + )).map(|(_, variable, _, value, _)| { + Let { variable, value } + }).parse(input) +} + +pub(crate) fn loop_stmt<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Loop> { + tuple(( + keyword(Keyword::While), + expr, + block, + )).map(|(_, condition, body)| { + Loop { condition, body } + }).parse(input) +} + +pub(crate) fn return_stmt<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Expression> { + delimited(keyword(Keyword::Return), expr, punct(";"))(input) +} + +pub(crate) fn expr_stmt<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Expression> { + terminated(expr, punct(";"))(input) +} + +enum Associativity { + LeftToRight, + RightToLeft, + NonAssociative, +} + +macro_rules! make_simple_subexpr_fn { + ($name:ident: $inner:ident $op:ident $assoc:expr => $variant:ident) => { + fn $name<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Expression> { + tuple(( + $inner, + opt(pair($op, $name)), + )).map(|(lhs, rhs)| { + use Associativity::*; + match (rhs, $assoc) { + (None, _) => lhs, + (Some((op, Expression::$variant { lhs: rhs_lhs, op: rhs_op, rhs: rhs_rhs })), RightToLeft) => { + // ((lhs + rhs.lhs) + rhs.rhs) + let new_lhs = Expression::$variant { lhs: lhs.into(), op, rhs: rhs_lhs }.into(); + let new_rhs = rhs_rhs; + let new_op = rhs_op; + Expression::$variant { lhs: new_lhs, op: new_op, rhs: new_rhs } + } + (Some((lhs_op, Expression::$variant { op: rhs_op, .. })), NonAssociative) => { + todo!("handle parsing error here: {:?} and {:?} do not associate. Use parentheses.", lhs_op, rhs_op) + } + (Some((op, rhs)), _) => { + // Handles different precedence operators, as well as LeftToRight associative same-precedence operators + Expression::$variant { lhs: lhs.into(), op, rhs: rhs.into() } + } + } + }).parse(input) + } + }; +} + +pub(crate) fn expr<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Expression> { + fn atom_expr<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Expression> { + alt(( + delimited(punct("("), expr, punct(")")).map(|expr| Expression::ParenExpression { expr: expr.into() }), + block.map(|block| Expression::BlockExpression{ block }), + ident.map(|name| Expression::VariableExpression { name: name.to_owned() }), + int_literal.map(|lit| Expression::IntLiteralExpression { literal: lit.into() }), + ))(input) + } + fn call_expr<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Expression> { + pair(atom_expr, call_expr_tail).map(|(func, argss)| { + let mut expr = func; + for arg_list in argss { + expr = Expression::CallExpression { function: expr.into(), args: arg_list }; + } + expr + }).parse(input) + } + fn arg_list<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Vec> { + separated_list0(punct(","), expr)(input) + } + fn call_expr_tail<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Vec>> { + many0(delimited(punct("("), arg_list, punct(")")))(input) + } + + make_simple_subexpr_fn!(mul_expr: call_expr mul_op LeftToRight => MulExpression); + make_simple_subexpr_fn!(add_expr: mul_expr add_op LeftToRight => AddExpression); + make_simple_subexpr_fn!(compare_expr: add_expr compare_op NonAssociative => CompareExpression); + make_simple_subexpr_fn!(and_expr: compare_expr and_op LeftToRight => AndExpression); + make_simple_subexpr_fn!(or_expr: and_expr or_op LeftToRight => OrExpression); + make_simple_subexpr_fn!(assign_expr: or_expr assign_op RightToLeft => AssignExpression); + assign_expr(input) +} + +pub(crate) fn static_<'a, 'b>(input: &'a[Token<'b>]) -> TokResult<'a, 'b, Static> { + tuple(( + keyword(Keyword::Static), + opt(keyword(Keyword::Atomic)), + ident, + assign_op, + expr, + punct(";"), + )).map(|(_, atomic, name, _, value, _)| { + Static{ + atomic: atomic.is_some(), + name: name.to_owned(), + value, + } + }).parse(input) +} + + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test2() { + let input = r#" + fn test(a, mut b) { + let mut acc = 1; + while b >= 1 { + acc = acc *? a; + b -= 1; + } + return acc; + } + "#; + let output = Module{ + items: vec![Item::FunctionItem(Function{ + name: "test".into(), + parameters: vec![Variable{ name: "a".into(), mutable: false }, Variable{ name: "b".into(), mutable: true }], + body: Block { statements: vec![ + Statement::LetStatement(Let{ variable: Variable { name: "acc".into(), mutable: true }, value: Expression::IntLiteralExpression { literal: "1".into() } }), + Statement::LoopStatement(Loop{ + condition: Expression::CompareExpression { + lhs: Expression::VariableExpression { name: "b".into() }.into(), + op: Operator{ operator: ">=".into(), checked: false }, + rhs: Expression::IntLiteralExpression { literal: "1".into() }.into(), + }, + body: Block { statements: vec![ + Statement::ExpressionStatement(Expression::AssignExpression{ + lhs: Expression::VariableExpression { name: "acc".into() }.into(), + op: Operator{ operator: "=".into(), checked: false }, + rhs: Expression::MulExpression { + lhs: Expression::VariableExpression { name: "acc".into() }.into(), + op: Operator{ operator: "*".into(), checked: true }, + rhs: Expression::VariableExpression { name: "a".into() }.into(), + }.into(), + }), + Statement::ExpressionStatement(Expression::AssignExpression{ + lhs: Expression::VariableExpression { name: "b".into() }.into(), + op: Operator{ operator: "-=".into(), checked: false }, + rhs: Expression::IntLiteralExpression { literal: "1".into() }.into(), + }), + ] } + }), + Statement::ReturnStatement(Expression::VariableExpression { name: "acc".into() }), + ] }, + })] + }; + let tokens = lex(input).unwrap().1; + let result = parse(&tokens).unwrap().1; + assert_eq!(result, output); + } + #[test] + fn associativity_1() { + let input = r#" + b -? 1 - x + "#; + let output = Expression::AddExpression { + lhs: Expression::VariableExpression { name: "b".into() }.into(), + op: Operator{ operator: "-".into(), checked: true }, + rhs: Expression::AddExpression { + lhs: Expression::IntLiteralExpression { literal: "1".into() }.into(), + op: Operator{ operator: "-".into(), checked: false }, + rhs: Expression::VariableExpression { name: "x".into() }.into(), + }.into(), + }; + let tokens = lex(input).unwrap().1; + let result = expr(&tokens).unwrap().1; + assert_eq!(result, output); + } + #[test] + fn associativity_2() { + let input = r#" + b -=? 1 = x + "#; + let output = Expression::AssignExpression { + lhs: Expression::AssignExpression { + lhs: Expression::VariableExpression { name: "b".into() }.into(), + op: Operator{ operator: "-=".into(), checked: true }, + rhs: Expression::IntLiteralExpression { literal: "1".into() }.into(), + }.into(), + op: Operator{ operator: "=".into(), checked: false }, + rhs: Expression::VariableExpression { name: "x".into() }.into(), + }; + let tokens = lex(input).unwrap().1; + let result = expr(&tokens).unwrap().1; + assert_eq!(result, output); + } + #[test] + fn precedence_1() { + let input = r#" + b -? 1 = x + "#; + let output = Expression::AssignExpression { + lhs: Expression::AddExpression { + lhs: Expression::VariableExpression { name: "b".into() }.into(), + op: Operator{ operator: "-".into(), checked: true }, + rhs: Expression::IntLiteralExpression { literal: "1".into() }.into(), + }.into(), + op: Operator{ operator: "=".into(), checked: false }, + rhs: Expression::VariableExpression { name: "x".into() }.into(), + }; + let tokens = lex(input).unwrap().1; + let result = expr(&tokens).unwrap().1; + assert_eq!(result, output); + } + #[test] + fn precedence_2() { + let input = r#" + b -=? 1 - x + "#; + let output = Expression::AssignExpression { + lhs: Expression::VariableExpression { name: "b".into() }.into(), + op: Operator{ operator: "-=".into(), checked: true }, + rhs: Expression::AddExpression { + lhs: Expression::IntLiteralExpression { literal: "1".into() }.into(), + op: Operator{ operator: "-".into(), checked: false }, + rhs: Expression::VariableExpression { name: "x".into() }.into(), + }.into(), + }; + let tokens = lex(input).unwrap().1; + let result = expr(&tokens).unwrap().1; + assert_eq!(result, output); + } +} \ No newline at end of file diff --git a/src/code.rs b/src/code.rs index 3eded5b..6485383 100644 --- a/src/code.rs +++ b/src/code.rs @@ -18,9 +18,12 @@ impl Relocation { pub(crate) fn new(location: usize, kind: arch::RelocationKind, symbol: Symbol, addend: isize) -> Self { Self { location, kind, symbol, addend } } } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct Relocatable { + /// Raw data pub(crate) data: Cow<'static, [u8]>, + /// Power of two minimum alignment of this data (0 means 1, 1 means 2, 2 means 4, etc.) + pub(crate) alignment: u32, /// Vector of symbol definitions in this section of data pub(crate) symbols: Vec<(Symbol, usize)>, /// Vector of absolute symbol definitions @@ -29,19 +32,51 @@ pub struct Relocatable { pub(crate) relocations: Vec, } +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Object { + pub code: Relocatable, + pub data: Relocatable, +} + +impl From for Object { + fn from(code: Relocatable) -> Self { + Self { code, data: Relocatable::default() } + } +} + +impl std::ops::Add for Object { + type Output = Self; + fn add(mut self, rhs: Self) -> Self { + self += rhs; + self + } +} + +impl std::ops::AddAssign for Object { + fn add_assign(&mut self, rhs: Self) { + self.code += rhs.code; + self.data += rhs.data; + } +} + macro_rules! impl_from_data_for_relocatable { ($ty:ty) => { impl From<$ty> for Relocatable { fn from(data: $ty) -> Self { Self { data: data.into(), + alignment: 0, symbols: vec![], abs_symbols: vec![], relocations: vec![], } } } - + impl From<$ty> for Object { + fn from(code: $ty) -> Self { + Self::from(Relocatable::from(code)) + } + } } } @@ -49,6 +84,24 @@ impl_from_data_for_relocatable!(&'static [u8]); impl_from_data_for_relocatable!(Cow<'static, [u8]>); impl_from_data_for_relocatable!(Vec); + +impl From<[u8; N]> for Relocatable { + fn from(data: [u8; N]) -> Self { + Self { + data: Vec::from(Box::new(data) as Box<[u8]>).into(), + alignment: 0, + symbols: vec![], + abs_symbols: vec![], + relocations: vec![], + } + } +} +impl From<[u8; N]> for Object { + fn from(code: [u8; N]) -> Self { + Self::from(Relocatable::from(code)) + } +} + impl std::ops::Add for Relocatable { type Output = Self; fn add(mut self, rhs: Self) -> Self { @@ -59,8 +112,23 @@ impl std::ops::Add for Relocatable { impl std::ops::AddAssign for Relocatable { fn add_assign(&mut self, rhs: Self) { + { + // Pad self so rhs is still correctly aligned + let rhs_align = 1usize << rhs.alignment; + let padding = (rhs_align - (self.data.len() % rhs_align)) % rhs_align; + if padding > 0 { + self.data.to_mut().reserve(padding + rhs.data.len()); + let new_lhs_len = self.data.len() + padding; + self.data.to_mut().resize(new_lhs_len, 0); + } + } + let lhs_len = self.data.len(); - self.data.to_mut().extend_from_slice(&rhs.data); + if rhs.data.len() > 0 { + self.data.to_mut().extend_from_slice(&rhs.data); + } + + self.alignment = self.alignment.max(rhs.alignment); self.abs_symbols.extend(rhs.abs_symbols.into_iter().map( |(sym, val)| (sym, val) diff --git a/src/code/arch/x86_64.rs b/src/code/arch/x86_64.rs index 988c21f..0205331 100644 --- a/src/code/arch/x86_64.rs +++ b/src/code/arch/x86_64.rs @@ -9,6 +9,8 @@ pub(crate) enum RelocationKind { Direct32 = 10, Direct32S = 11, + + Pc8 = 15, } impl RelocationKind { @@ -30,6 +32,19 @@ impl RelocationKind { *reloc_slice = i32::to_ne_bytes(actual_value); Ok(()) }, + Pc8 => { + let reloc_slice: &mut [u8; 1] = data.get_mut(location..location+1) + .ok_or(AssembleError::InvalidRelocation("Attempted to apply relocation past end of section"))? + .try_into().unwrap(); + let value: isize = if location < value { + (value - location).try_into().ok().ok_or(AssembleError::InvalidRelocation("Relative relocation difference too large"))? + } else { + -(location - value).try_into().ok().ok_or(AssembleError::InvalidRelocation("Relative relocation difference too large"))? + }; + let actual_value: i8 = value.try_into().ok().ok_or(AssembleError::InvalidRelocation("Relative relocation difference too large"))?; + *reloc_slice = i8::to_ne_bytes(actual_value); + Ok(()) + }, Direct64 | Direct32 | Direct32S => Err(AssembleError::InvalidRelocation("Cannot apply direct relocation for relative symbol")), } } diff --git a/src/commands/arch/x86_64.rs b/src/commands/arch/x86_64.rs index 3d137ec..e7514d9 100644 --- a/src/commands/arch/x86_64.rs +++ b/src/commands/arch/x86_64.rs @@ -87,6 +87,7 @@ fn new_while_loop_header_footer() -> (Relocatable, Relocatable) { let header_code = Relocatable { data: header_code.into(), + alignment: 0, symbols: vec![(header_branch_symbol.clone(), header_code.len())], abs_symbols: vec![], relocations: vec![Relocation::new(header_offset_loc.start, RelocationKind::Pc32, footer_branch_symbol.clone(), -4)], @@ -97,6 +98,7 @@ fn new_while_loop_header_footer() -> (Relocatable, Relocatable) { let footer_code = Relocatable { data: footer_code.into(), + alignment: 0, symbols: vec![(footer_branch_symbol.clone(), footer_code.len())], abs_symbols: vec![], relocations: vec![Relocation::new(footer_offset_loc.start, RelocationKind::Pc32, header_branch_symbol.clone(), -4)], diff --git a/src/compiler.rs b/src/compiler.rs new file mode 100644 index 0000000..8ba2459 --- /dev/null +++ b/src/compiler.rs @@ -0,0 +1,527 @@ +use crate::{code::{Symbol, Relocatable, Object}, ast::Function}; +use std::{fmt::Debug, collections::HashMap, cell::{RefCell, Cell}, rc::Rc, env::VarError, borrow::Cow}; + + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum Type { + Integer, + Boolean, + Function, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum Location { + Local { + stack_index: usize, + }, + Static { + symbol: Symbol, + atomic: bool, + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct Variable { + pub(crate) name: Symbol, + pub(crate) mutable: bool, + pub(crate) r#type: Type, + pub(crate) location: Location, +} + + +pub(crate) trait Machine : Debug + Copy { + type Register: Copy + Debug + Eq + Ord; + type Clobber: IntoIterator + Extend; + + fn natural_alignment(self) -> u32; + + #[must_use] + fn function_prologue_epilogue_abort(self, stack_slots: usize, arg_slots: Vec) -> Result<(Object, Object, Object), CompileError<'static>>; + + #[must_use] + /// alignment is the power of two of the minimum alignment, e.g. 0 is 1, 3 is 8. + fn add_data(self, data: Vec, alignment: u32, symbol: Symbol) -> Object; + + fn usable_registers(self) -> Vec; + #[must_use] + fn load_from(self, into: Self::Register, from: &Variable) -> (Object, Self::Clobber); + #[must_use] + fn store_to(self, from: Self::Register, into: &Variable) -> (Object, Self::Clobber); + + #[must_use] + fn copy_from(self, dst: Self::Register, src: Self::Register) -> (Object, Self::Clobber); + #[must_use] + fn copy_into(self, src: Self::Register, dst: Self::Register) -> (Object, Self::Clobber) { + self.copy_from(dst, src) + } + + #[must_use] + fn add_assign(self, lhs: Self::Register, rhs: Self::Register) -> (Object, Self::Clobber); + #[must_use] + fn checked_add_assign(self, lhs: Self::Register, rhs: Self::Register) -> (Object, Self::Clobber); + + #[must_use] + fn add(self, lhs: Self::Register, rhs: Self::Register, result: Self::Register) -> (Object, Self::Clobber) { + if lhs == result { + return self.checked_add_assign(lhs, rhs); + } + let (add, mut clobbers) = self.add_assign(lhs, rhs); + let (copy_into, copy_clobbers) = self.copy_into(lhs, result); + clobbers.extend(copy_clobbers); + clobbers.extend([lhs]); + + (add + copy_into, clobbers) + } + #[must_use] + fn checked_add(self, lhs: Self::Register, rhs: Self::Register, result: Self::Register) -> (Object, Self::Clobber) { + if lhs == result { + return self.checked_add_assign(lhs, rhs); + } + let (add, mut clobbers) = self.checked_add_assign(lhs, rhs); + let (copy_into, copy_clobbers) = self.copy_into(lhs, result); + clobbers.extend(copy_clobbers); + clobbers.extend([lhs]); + + (add + copy_into, clobbers) + } + + #[must_use] + fn sub_assign(self, lhs: Self::Register, rhs: Self::Register) -> (Object, Self::Clobber); + #[must_use] + fn checked_sub_assign(self, lhs: Self::Register, rhs: Self::Register) -> (Object, Self::Clobber); + + #[must_use] + fn sub(self, lhs: Self::Register, rhs: Self::Register, result: Self::Register) -> (Object, Self::Clobber) { + if lhs == result { + return self.checked_sub_assign(lhs, rhs); + } + let (sub, mut clobbers) = self.sub_assign(lhs, rhs); + let (copy_into, copy_clobbers) = self.copy_into(lhs, result); + clobbers.extend(copy_clobbers); + clobbers.extend([lhs]); + + (sub + copy_into, clobbers) + } + #[must_use] + fn checked_sub(self, lhs: Self::Register, rhs: Self::Register, result: Self::Register) -> (Object, Self::Clobber) { + if lhs == result { + return self.checked_sub_assign(lhs, rhs); + } + let (sub, mut clobbers) = self.checked_sub_assign(lhs, rhs); + let (copy_into, copy_clobbers) = self.copy_into(lhs, result); + clobbers.extend(copy_clobbers); + clobbers.extend([lhs]); + + (sub + copy_into, clobbers) + } +} + +mod arch; + +#[derive(Debug, Clone, Default)] +pub(crate) struct GlobalState { + pub(crate) symbols: HashMap>, +} + +impl GlobalState { + fn new_static(&mut self, machine: M, value: isize, symbol: Symbol) -> (Object, Rc) { + let var = Rc::new(Variable{ + name: symbol.clone(), + mutable: false, + r#type: Type::Integer, + location: Location::Static { symbol: symbol.clone(), atomic: false }, + }); + self.symbols.insert(symbol.clone(), Rc::clone(&var)); + (machine.add_data(value.to_le_bytes().into(), machine.natural_alignment(), symbol), var) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum CompileError<'ast> { + UndefinedSymbol(&'ast str), + Other(Cow<'static, str>), +} + +pub(crate) trait Compilable<'ast, M: Machine, State = GlobalState> { + type Output: Debug + Clone; // = (); + fn compile(&'ast self, machine: M, state: &mut State) -> Result>; +} + +pub fn compile(ast: &crate::ast::Module) -> Result> { + let mut state = GlobalState::default(); + ast.compile(arch::Machine, &mut state) +} + +pub(crate) fn compile_helper<'a, C: Compilable<'a, arch::Machine, Output = Object>>(ast: &'a C) -> Result> { + let mut state = GlobalState::default(); + ast.compile(arch::Machine, &mut state) +} + +impl<'ast, M: Machine> Compilable<'ast, M> for crate::ast::Module { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut GlobalState) -> Result> { + self.items + .iter() + .map(|item| item.compile(machine, state)) + .reduce(|a, b| Ok(a? + b?)) + .unwrap_or_else(|| Ok(Default::default())) + } +} + +impl<'ast, M: Machine> Compilable<'ast, M> for crate::ast::Item { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut GlobalState) -> Result> { + match self { + crate::ast::Item::FunctionItem(item) => item.compile(machine, state), + crate::ast::Item::StaticItem(item) => item.compile(machine, state), + } + } +} + +struct FunctionScopeState<'parent, 'ast> { + global_state: &'parent mut GlobalState, + parent_state: Option<&'parent FunctionScopeState<'parent, 'ast>>, + stack_depth: usize, + locals: HashMap<&'ast str, Rc>, + function_return: Symbol, + function_abort: Symbol, +} + +impl<'parent, 'ast> FunctionScopeState<'parent, 'ast> { + fn new(global_state: &'parent mut GlobalState) -> Self { + let function_return = Symbol::new_local(); + let function_abort = Symbol::new_local(); + Self { + global_state, + parent_state: None, + stack_depth: 0, + locals: Default::default(), + function_return, + function_abort, + } + } + fn new_stack_slot(&mut self) -> usize { + let slot = self.stack_depth; + self.stack_depth += 1; + slot + } + fn new_local(&mut self, name: &'ast str, mutable: bool) -> Rc { + let slot = self.new_stack_slot(); + let sym = Symbol::new_local(); + let var = Variable{ + name: sym.clone(), + mutable, + r#type: Type::Integer, + location: Location::Local { stack_index: slot }, + }; + let var = Rc::new(var); + eprintln!("TODO: handle shadowing?"); + self.locals.insert(name, Rc::clone(&var)); + var + } + fn new_temporary(&mut self) -> Rc { + let slot = self.new_stack_slot(); + let sym = Symbol::new_local(); + let var = Variable{ + name: sym.clone(), + mutable: false, + r#type: Type::Integer, + location: Location::Local { stack_index: slot }, + }; + let var = Rc::new(var); + // eprintln!("TODO: handle shadowing?"); + // self.locals.insert(name, Rc::clone(&var)); + var + } + fn get_variable(&self, name: &str) -> Option> { + match self.locals.get(name) { + Some(var) => Some(Rc::clone(var)), + None => match self.parent_state { + Some(state) => state.get_variable(name), + None => { + let symbol = Symbol::new_global(name.into()); + self.global_state.symbols.get(&symbol).map(Rc::clone) + } + } + } + } +} + +impl<'ast, M: Machine> Compilable<'ast, M> for crate::ast::Function { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut GlobalState) -> Result> { + let mut state = FunctionScopeState::new(state); + let arg_slots = self.parameters + .iter() + .map(|parameter| { + let local_var = state.new_local(¶meter.name, parameter.mutable); + match local_var.location { + Location::Local { stack_index } => stack_index, + Location::Static { .. } => unreachable!(), + } + }) + .collect::>() + ; + let body = self.body.compile(machine, &mut state)?; + + let (prologue, epilogue, abort) = machine.function_prologue_epilogue_abort(state.stack_depth, arg_slots)?; + + todo!("prologue and epilogue") + } +} + +impl<'ast, M: Machine> Compilable<'ast, M> for crate::ast::Static { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut GlobalState) -> Result> { + todo!() + } +} + +impl<'a, 'ast, M: Machine> Compilable<'ast, M, FunctionScopeState<'a, 'ast>> for crate::ast::Block { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut FunctionScopeState<'a, 'ast>) -> Result> { + self.statements + .iter() + .map(|item| item.compile(machine, state)) + .reduce(|a, b| Ok(a? + b?)) + .unwrap_or_else(|| Ok(Default::default())) + } +} + +impl<'a, 'ast, M: Machine> Compilable<'ast, M, FunctionScopeState<'a, 'ast>> for crate::ast::Statement { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut FunctionScopeState<'a, 'ast>) -> Result> { + match self { + crate::ast::Statement::LetStatement(stmt) => stmt.compile(machine, state), + crate::ast::Statement::ExpressionStatement(expr) => Ok(expr.compile(machine, state)?.0), + crate::ast::Statement::LoopStatement(stmt) => stmt.compile(machine, state), + crate::ast::Statement::ReturnStatement(expr) => { + todo!("compile return statement") + }, + } + } +} + +impl<'a, 'ast, M: Machine> Compilable<'ast, M, FunctionScopeState<'a, 'ast>> for crate::ast::Let { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut FunctionScopeState<'a, 'ast>) -> Result> { + let var = state.new_local(&self.variable.name, self.variable.mutable); + let (mut code, initializer) = self.value.compile(machine, state)?; + + let mut regs = machine.usable_registers().into_iter(); + let reg = regs.next().expect("todo"); + + code += machine.load_from(reg, &initializer).0; + code += machine.store_to(reg, &var).0; + + Ok(code) + } +} + +impl<'a, 'ast, M: Machine> Compilable<'ast, M, FunctionScopeState<'a, 'ast>> for crate::ast::Loop { + type Output = Object; + fn compile(&'ast self, machine: M, state: &mut FunctionScopeState<'a, 'ast>) -> Result> { + todo!() + } +} + +impl<'a, 'ast, M: Machine> Compilable<'ast, M, FunctionScopeState<'a, 'ast>> for crate::ast::Expression { + // Returns the temporary that was created for the value of this expression. + type Output = (Object, Rc); + fn compile(&'ast self, machine: M, state: &mut FunctionScopeState<'a, 'ast>) -> Result> { + match self { + crate::ast::Expression::AssignExpression { rhs, op, lhs } => { + match (&*op.operator, op.checked) { + ("=", _) => { + let (mut code, lhs) = lhs.compile(machine, state)?; + let (rhs_code, rhs) = rhs.compile(machine, state)?; + code += rhs_code; + + match (lhs.r#type, rhs.r#type) { + (Type::Integer, Type::Integer) => {}, + _ => panic!("todo: typing"), + }; + + let mut regs = machine.usable_registers().into_iter(); + let reg = regs.next().expect("todo"); + + code += machine.load_from(reg, &rhs).0; + code += machine.store_to(reg, &lhs).0; + + Ok((code, rhs)) + }, + _ => todo!(), + } + }, + crate::ast::Expression::OrExpression { rhs, op, lhs } => todo!(), + crate::ast::Expression::AndExpression { rhs, op, lhs } => todo!(), + crate::ast::Expression::CompareExpression { rhs, op, lhs } => todo!(), + crate::ast::Expression::AddExpression { rhs, op, lhs } => { + let (mut code, lhs) = lhs.compile(machine, state)?; + let (rhs_code, rhs) = rhs.compile(machine, state)?; + code += rhs_code; + + match (lhs.r#type, rhs.r#type) { + (Type::Integer, Type::Integer) => {}, + _ => panic!("todo: typing"), + }; + + let mut regs = machine.usable_registers().into_iter(); + let dst = regs.next().expect("todo"); + let src = regs.next().expect("todo"); + + code += machine.load_from(dst, &lhs).0; + code += machine.load_from(src, &rhs).0; + match (&*op.operator, op.checked) { + ("+", false) => code += machine.add_assign(dst, src).0, + ("+", true) => code += machine.checked_add_assign(dst, src).0, + ("-", false) => code += machine.sub_assign(dst, src).0, + ("-", true) => code += machine.checked_sub_assign(dst, src).0, + _ => panic!("invalid operator"), + }; + + let result_var = state.new_temporary(); + + code += machine.store_to(dst, &result_var).0; + + Ok((code, result_var)) + }, + crate::ast::Expression::MulExpression { rhs, op, lhs } => todo!(), + crate::ast::Expression::ParenExpression { expr } => expr.compile(machine, state), + crate::ast::Expression::CallExpression { function, args } => { + let (mut code, func) = function.compile(machine, state)?; + + match func.r#type { + Type::Function => {}, + _ => panic!("todo: typing"), + }; + + todo!() + }, + crate::ast::Expression::VariableExpression { name } => { + let var = state.get_variable(&name).ok_or(CompileError::UndefinedSymbol(&name))?; + Ok((Default::default(), var)) + }, + crate::ast::Expression::BlockExpression { block } => todo!(), + crate::ast::Expression::IntLiteralExpression { literal } => { + let literal = literal.parse::().ok().ok_or(CompileError::Other("invalid integer literal".into()))?; + let sym = Symbol::new_local(); + let (obj, var) = state.global_state.new_static(machine, literal, sym.clone()); + Ok((obj, var)) + }, + } + } +} + + +#[cfg(test)] +mod tests { + use crate::compiler::CompileError; + use super::Compilable; + use crate::compiler::FunctionScopeState; + use super::GlobalState; + + fn assert_eq_helper(actual: &[u8], expected: &[u8], print_limit: usize) { + if actual.len() != expected.len() { + eprintln!("Different lengths: actual: {}, expected: {}", actual.len(), expected.len()); + } + + let mut incorrect_count = 0; + for (i, (actual, expected)) in actual.iter().zip(expected.iter()).enumerate() { + if actual != expected { + incorrect_count += 1; + eprintln!("Byte #{i} incorrect: actual: {actual} ({actual:x}), expected: {expected} ({expected:x})"); + if incorrect_count > print_limit { + eprintln!("More than {print_limit} incorrect bytes found, not printing any more."); + assert_eq!(actual, expected); + } + } + } + assert_eq!(actual, expected); + } + + #[test] + fn assignment_vars_1() { + + let source = r#"{ + x = y; + }"#; + let tokens = crate::ast::lex(source).unwrap().1; + let ast = crate::ast::parsing::block(&tokens).unwrap().1; + let mut global_state = GlobalState::default(); + let mut scope_state = FunctionScopeState::new(&mut global_state); + assert_eq!(ast.compile(super::arch::Machine, &mut scope_state), Err(CompileError::UndefinedSymbol("x"))); + } + #[test] + fn assignment_vars_2() { + + let source = r#"{ + let x = 1; + x = y; + }"#; + let tokens = crate::ast::lex(source).unwrap().1; + let ast = crate::ast::parsing::block(&tokens).unwrap().1; + let mut global_state = GlobalState::default(); + let mut scope_state = FunctionScopeState::new(&mut global_state); + assert_eq!(ast.compile(super::arch::Machine, &mut scope_state), Err(CompileError::UndefinedSymbol("y"))); + } + #[test] + fn assignment_vars_3() { + + let source = r#"{ + let mut x = 1; + let y = 2; + x = y; + }"#; + let tokens = crate::ast::lex(source).unwrap().1; + let ast = crate::ast::parsing::block(&tokens).unwrap().1; + let mut global_state = GlobalState::default(); + let mut scope_state = FunctionScopeState::new(&mut global_state); + let obj = ast.compile(super::arch::Machine, &mut scope_state).unwrap(); + let assembled = (obj.code + obj.data).assemble().unwrap(); + #[cfg(target_arch = "x86_64")] + let target = [ + 0x48, 0x8b, 0x05, 0x19, 0x00, 0x00, 0x00, // mov .LCone(%rip),%rax + 0x48, 0x89, 0x04, 0x24, // mov %rax,x(%rsp) + 0x48, 0x8b, 0x05, 0x16, 0x00, 0x00, 0x00, // mov .LCtwo(%rip),%rax + 0x48, 0x89, 0x44, 0x24, 0x08, // mov %rax,y(%rsp) + 0x48, 0x8b, 0x44, 0x24, 0x08, // mov y(%rsp),%rax + 0x48, 0x89, 0x04, 0x24, // mov %rax,x(%rsp) + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // .quad 1 + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // .quad 2 + ]; + assert_eq_helper(&assembled, &target, 4); + } + #[test] + fn assignment_vars_4() { + + let source = r#"{ + let mut x = 1; + let y = 2; + x = x + y; + }"#; + let tokens = crate::ast::lex(source).unwrap().1; + let ast = crate::ast::parsing::block(&tokens).unwrap().1; + let mut global_state = GlobalState::default(); + let mut scope_state = FunctionScopeState::new(&mut global_state); + let obj = ast.compile(super::arch::Machine, &mut scope_state).unwrap(); + let assembled = (obj.code + obj.data).assemble().unwrap(); + #[cfg(target_arch = "x86_64")] + let target = [ + 0x48, 0x8b, 0x05, 0x31, 0x00, 0x00, 0x00, // mov .LCone(%rip),%rax + 0x48, 0x89, 0x04, 0x24, // mov %rax,x(%rsp) + 0x48, 0x8b, 0x05, 0x2e, 0x00, 0x00, 0x00, // mov .LCtwo(%rip),%rax + 0x48, 0x89, 0x44, 0x24, 0x08, // mov %rax,y(%rsp) + 0x48, 0x8b, 0x04, 0x24, // mov x(%rsp),%rax + 0x48, 0x8b, 0x4c, 0x24, 0x08, // mov y(%rsp),%rcx + 0x48, 0x01, 0xc8, // add %rcx,%rax + 0x48, 0x89, 0x44, 0x24, 0x10, // mov %rax,.Ltemp1 + 0x48, 0x8b, 0x44, 0x24, 0x10, // mov .Ltemp1,%rax + 0x48, 0x89, 0x04, 0x24, // mov %rax,x(%rsp) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // // padding + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // .quad 1 + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // .quad 2 + ]; + assert_eq_helper(&assembled, &target, 4); + } +} diff --git a/src/compiler/arch.rs b/src/compiler/arch.rs new file mode 100644 index 0000000..bbc6455 --- /dev/null +++ b/src/compiler/arch.rs @@ -0,0 +1,19 @@ +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub(crate) use x86_64::*; + +#[cfg(target_arch = "x86")] +mod x86; +#[cfg(target_arch = "x86")] +pub(crate) use x86::*; + +#[cfg(target_arch = "arm")] +mod arm; +#[cfg(target_arch = "arm")] +pub(crate) use arm::*; + +#[cfg(target_arch = "aarch64")] +mod aarch64; +#[cfg(target_arch = "aarch64")] +pub(crate) use aarch64::*; diff --git a/src/compiler/arch/x86_64.rs b/src/compiler/arch/x86_64.rs new file mode 100644 index 0000000..7ff4dc8 --- /dev/null +++ b/src/compiler/arch/x86_64.rs @@ -0,0 +1,556 @@ +use crate::{compiler::{Variable, Location, CompileError}, code::{Relocatable, Object, Relocation, RelocationKind, Symbol}}; + +#[derive(Debug, Clone, Copy)] +pub(crate) struct Machine; + +macro_rules! make_register_type { + ( + $caller_saved:ident; + $callee_saved:ident; + $unusable:ident; + $(#[$meta:meta])* + $vis:vis enum $name:ident { + $($variant:ident = $value:literal $usability:ident),* $(,)? + } + ) => { + $(#[$meta])* + $vis enum $name { + $($variant = $value),* + } + const ALL_REGISTERS: &[$name] = &[ $($name::$variant),* ]; + impl $name { + fn is_caller_saved(&self) -> bool { + let $caller_saved = true; + let $callee_saved = false; + let $unusable = false; + use $name::*; + match self { + $( + $variant => $usability, + )* + } + } + fn is_callee_saved(&self) -> bool { + let $caller_saved = false; + let $callee_saved = true; + let $unusable = false; + use $name::*; + match self { + $( + $variant => $usability, + )* + } + } + fn is_unusable(&self) -> bool { + let $caller_saved = false; + let $callee_saved = false; + let $unusable = true; + use $name::*; + match self { + $( + $variant => $usability, + )* + } + } + } + } +} + +make_register_type!{ +caller_saved; +callee_saved; +unusable; +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(u8)] +pub(crate) enum Register { + Rax = 0 caller_saved, + Rcx = 1 caller_saved, + Rdx = 2 caller_saved, + Rbx = 3 callee_saved, + Rsp = 4 unusable, + Rbp = 5 unusable, + Rsi = 6 caller_saved, + Rdi = 7 caller_saved, + R8 = 8 caller_saved, + R9 = 9 caller_saved, + R10 = 10 caller_saved, + R11 = 11 caller_saved, + R12 = 12 callee_saved, + R13 = 13 callee_saved, + R14 = 14 callee_saved, + R15 = 15 unusable, + Rip = 16 unusable, +} +} + +struct RegisterDisplacement { + register: Register, + disp: i32 +} + +macro_rules! impl_from_for_enum { + ( + $(#[$meta:meta])* + $vis:vis enum $name:ident { + $($variant:ident($fieldty:ty)),* $(,)? + } + ) => { + $(#[$meta])* + $vis enum $name { + $($variant($fieldty)),* + } + $( + impl From<$fieldty> for $name { + fn from(field: $fieldty) -> Self { + $name::$variant(field) + } + } + )* + } +} + +impl_from_for_enum!{ +enum Operand { + Register(Register), + RegisterDisplacement(RegisterDisplacement), + Symbol(Symbol), // Rip-relative +} +} + +macro_rules! make_simple_instruction_type { + ( + $(#[$meta:meta])* + $vis:vis enum $name:ident { + $($variant:ident = $src_rm:tt / $dst_rm:tt),* $(,)? + } + ) => { + $(#[$meta])* + $vis enum $name { + $($variant),* + } + impl $name { + fn src_rm_opcode(self) -> Option { + use $name::*; + match self { + $( + $variant => $src_rm.into(), + )* + } + } + fn dst_rm_opcode(self) -> Option { + use $name::*; + match self { + $( + $variant => $dst_rm.into(), + )* + } + } + } + } +} + +make_simple_instruction_type!{ +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum SimpleInstruction { + Mov = 0x8b / 0x89, + Add = 0x03 / 0x01, + Sub = 0x2b / 0x29, + Lea = 0x8d / None, +} +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(u8)] +enum ConditionalJump { + Overflow = 0x0, + NoyOverflow = 0x1, + Below = 0x2, + AboveOrEqual = 0x3, + Equal = 0x4, + NotEqual = 0x5, + BelowOrEqual = 0x6, + Above = 0x7, + Sign = 0x8, + NotSign = 0x9, + ParityEven = 0xa, + ParityOdd = 0xb, + Less = 0xc, + GreaterOrEqual = 0xd, + LessOrEqual = 0xe, + Greater = 0xf, +} + +impl ConditionalJump { + fn short_jump(self, dst: Symbol) -> Relocatable { + let bytes = vec![0x70 | self as u8, 0x00]; + Relocatable{ + data: bytes.into(), + alignment: 0, + symbols: vec![], + abs_symbols: vec![], + relocations: vec![ + Relocation::new(1, RelocationKind::Pc8, dst, -1), + ], + } + } + fn near_jump(self, dst: Symbol) -> Relocatable { + let bytes = vec![0x0f, 0x80 | self as u8, 0x00, 0x00, 0x00, 0x00]; + Relocatable{ + data: bytes.into(), + alignment: 0, + symbols: vec![], + abs_symbols: vec![], + relocations: vec![ + Relocation::new(1, RelocationKind::Pc32, dst, -2), + ], + } + } +} + +/// Returns the ModRM + SIB + disp, and REX.B +/// The reg field of ModRM is left as 0 +fn modrm_and_maybe_sib(rm: Operand) -> Result<(Relocatable, bool), ()> { + match rm { + Operand::Register(rm) => { + if rm as u8 >= 16 { todo!() } + let mut bytes = vec![0o300]; + let modrm = &mut bytes[0]; + *modrm |= (rm as u8 & 0x7) << 0; + let rex_b = ((rm as u8 >> 3) & 0x1) != 0; + Ok((bytes.into(), rex_b)) + }, + Operand::RegisterDisplacement(RegisterDisplacement { register: rm, disp }) => { + // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing + if rm as u8 >= 16 { todo!() } + match (rm, disp) { + (Register::Rsp | Register::R12, _) => { + // rsp/r12 must use SIB + let mut bytes = if disp == 0 { + vec![0o004, 0o040] + } else if let Ok(disp) = i8::try_from(disp) { + let mut bytes = vec![0o104, 0o040, 0x00]; + bytes[2..].copy_from_slice(&disp.to_le_bytes()); + bytes + } else { + let mut bytes = vec![0o204, 0o040, 0x00, 0x00, 0x00, 0x00]; + bytes[2..].copy_from_slice(&disp.to_le_bytes()); + bytes + }; + + let [modrm, sib] = <&mut [u8; 2]>::try_from(&mut bytes[..2]).unwrap(); + *sib |= (rm as u8 & 0x7) << 0; // index + let rex_b = ((rm as u8 >> 3) & 0x1) != 0; + + Ok((bytes.into(), rex_b)) + }, + (_, 0) if !matches!(rm, Register::Rbp | Register::R13) => { + // Rbp/R13 must use disp8 for 0 because of rip-relative + let mut bytes = vec![0o000]; + let modrm = &mut bytes[0]; + *modrm |= (rm as u8 & 0x7) << 0; + let rex_b = ((rm as u8 >> 3) & 0x1) != 0; + Ok((bytes.into(), rex_b)) + }, + (_, disp) => { + let mut bytes = if let Ok(disp) = i8::try_from(disp) { + let mut bytes = vec![0o100, 0x00]; + bytes[1..].copy_from_slice(&disp.to_le_bytes()); + bytes + } else { + let mut bytes = vec![0o200, 0x00, 0x00, 0x00, 0x00]; + bytes[1..].copy_from_slice(&disp.to_le_bytes()); + bytes + }; + let modrm = &mut bytes[0]; + *modrm |= (rm as u8 & 0x7) << 0; + let rex_b = ((rm as u8 >> 3) & 0x1) != 0; + Ok((bytes.into(), rex_b)) + }, + } + }, + Operand::Symbol(sym) => { + let mut bytes = vec![ + 0o000, 0x00, 0x00, 0x00, 0x00, + ]; + + let rex_b = false; // rip-relative uses b.r/m = 0.101 or 1.101, so just let b = 0 + + let modrm_rm = 0b101 << 0; // rip-relative uses b.r/m = 0.101 + bytes[0] |= modrm_rm; + + let code = Relocatable{ + data: bytes.into(), + alignment: 0, + symbols: vec![], + abs_symbols: vec![], + relocations: vec![ + Relocation::new(1, RelocationKind::Pc32, sym, -4) + ], + }; + + Ok((code, rex_b)) + }, + } +} + +impl Machine { + fn simple(self, src: Operand, dst: Operand, instruction: SimpleInstruction) -> Result { + use Operand::*; + match (src, dst) { + (Register(src), dst) => self.simple_reg_to_rm(src, dst, instruction), + (src, Register(dst)) => self.simple_rm_to_reg(src, dst, instruction), + _ => todo!(), + } + } + + fn simple_reg_to_rm(self, src: Register, dst: Operand, instruction: SimpleInstruction) -> Result { + let src = src as u8; + if src >= 16 { return Err(()); } + + let (mut code, rex_b) = modrm_and_maybe_sib(dst)?; + let opcode = vec![0x48, instruction.dst_rm_opcode().ok_or(())?]; + let mut code = Relocatable::from(opcode) + code; + + let bytes = code.data.to_mut(); + + let rex_b = (rex_b as u8) << 0; + let rex_r = ((src >> 3) & 0x1) << 2; + bytes[0] |= rex_b | rex_r; + + let modrm_reg = (src & 0x7) << 3; + bytes[2] |= modrm_reg; + Ok(code) + } + + fn simple_rm_to_reg(self, src: Operand, dst: Register, instruction: SimpleInstruction) -> Result { + let dst = dst as u8; + if dst >= 16 { return Err(()); } + + let (mut code, rex_b) = modrm_and_maybe_sib(src)?; + let opcode = vec![0x48, instruction.src_rm_opcode().ok_or(())?]; + let mut code = Relocatable::from(opcode) + code; + + let bytes = code.data.to_mut(); + + let rex_b = (rex_b as u8) << 0; + let rex_r = ((dst >> 3) & 0x1) << 2; + bytes[0] |= rex_b | rex_r; + + let modrm_reg = (dst & 0x7) << 3; + bytes[2] |= modrm_reg; + Ok(code) + } + + fn simple_reg_to_reg(self, src: Register, dst: Register, instruction: SimpleInstruction) -> Result { + self.simple_reg_to_rm(src, dst.into(), instruction) + } + + fn simple_reg_to_symbol(self, src: Register, dst: Symbol, instruction: SimpleInstruction) -> Result { + self.simple_reg_to_rm(src, dst.into(), instruction) + } + + fn simple_symbol_to_reg(self, src: Symbol, dst: Register, instruction: SimpleInstruction) -> Result { + self.simple_rm_to_reg(src.into(), dst, instruction) + } + + fn simple_reg_to_memory(self, src: Register, dst: RegisterDisplacement, instruction: SimpleInstruction) -> Result { + self.simple_reg_to_rm(src, dst.into(), instruction) + } + + fn simple_memory_to_reg(self, src: RegisterDisplacement, dst: Register, instruction: SimpleInstruction) -> Result { + self.simple_rm_to_reg(src.into(), dst, instruction) + } +} + + +const ARG_REGISTERS: [Register; 6] = [ + Register::Rdi, + Register::Rsi, + Register::Rdx, + Register::Rcx, + Register::R8, + Register::R9, +]; + +impl super::super::Machine for Machine { + type Register = Register; + type Clobber = Vec; + + fn natural_alignment(self) -> u32 { 3 } + + fn function_prologue_epilogue_abort(self, stack_slots: usize, arg_slots: Vec) -> Result<(Object, Object, Object), CompileError<'static>> { + for arg in &arg_slots { + if *arg >= stack_slots { + return Err(CompileError::Other(format!("invalid arg slot: {} is >= {}, the number of stack slots", arg, stack_slots).into())); + } + } + let mut code: Vec = vec![ + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0x55, // push %rbp + 0x48, 0x89, 0xe5, // mov %rsp,%rbp + 0x48, 0x81, 0xec, 0x00, 0x00, 0x00, 0x00, // sub $slots*8,%rsp + ]; + let stack_slots = i32::try_from(stack_slots).unwrap(); + code[11..].copy_from_slice(&stack_slots.to_le_bytes()); + + let mut arg_slots = arg_slots.into_iter(); + + let mut code = Relocatable::from(code); + + for (reg, slot) in ARG_REGISTERS.into_iter().zip(arg_slots.by_ref().take(6)) { + let mut bytes = [ + 0x48, 0x89, 0x84, 0x24, 0x00, 0x00, 0x00, 0x00, + ]; + + let reg = reg as u8; + + match reg >> 3 { + 0 => {}, + 1 => { + bytes[0] |= 0x04; + }, + _ => unreachable!(), + }; + let mask = (reg & 0x07) << 3; + bytes[2] |= mask; + + code += bytes.into(); + } + + for (frame_index, slot) in arg_slots.enumerate() { + // Offset for the frame pointer and the return address + let frame_index = frame_index + 2; + let frame_offset = frame_index.checked_mul(8).unwrap(); + let disp = i32::try_from(frame_offset).unwrap(); + let src = RegisterDisplacement { register: Register::Rbp, disp }; + let stack_offset = slot.checked_mul(8).unwrap(); + let disp = i32::try_from(stack_offset).unwrap(); + let dst = RegisterDisplacement { register: Register::Rsp, disp }; + let intermediate = Register::Rax; + + code += self.simple(src.into(), intermediate.into(), SimpleInstruction::Mov).unwrap(); + code += self.simple(intermediate.into(), dst.into(), SimpleInstruction::Mov).unwrap(); + } + + todo!() + } + + /// alignment is the power of two of the minimum alignment, e.g. 0 is 1, 3 is 8. + fn add_data(self, data: Vec, alignment: u32, symbol: Symbol) -> Object { + let data = Relocatable{ + data: data.into(), + alignment, + symbols: vec![(symbol, 0)], + abs_symbols: vec![], + relocations: vec![], + }; + Object { + code: Default::default(), + data, + } + } + + fn usable_registers(self) -> Vec { + ALL_REGISTERS.iter().copied().filter(Register::is_caller_saved).collect() + } + fn load_from(self, into: Register, from: &Variable) -> (Object, Self::Clobber) { + let Variable { location, .. } = from; + match location { + Location::Local { stack_index } => { + let stack_offset = stack_index.checked_mul(8).unwrap(); + let disp = i32::try_from(stack_offset).unwrap(); + let src = RegisterDisplacement { register: Register::Rsp, disp }; + + ( + self.simple_memory_to_reg(src, into, SimpleInstruction::Mov).unwrap().into(), + Default::default(), + ) + }, + Location::Static { symbol, /*atomic*/ .. } => { + // Aligned loads on amd64 are always atomic + ( + self.simple_symbol_to_reg(symbol.clone(), into, SimpleInstruction::Mov).unwrap().into(), + Default::default(), + ) + }, + } + } + + fn store_to(self, from: Register, into: &Variable) -> (Object, Self::Clobber) { + let Variable { location, mutable, ..} = into; + // if !mutable { panic!("TODO: error cannot change immutable variable"); } // NOTE: this doesn't work because we use store_to to initialize also + match location { + Location::Local { stack_index } => { + let stack_offset = stack_index.checked_mul(8).unwrap(); + let disp = i32::try_from(stack_offset).unwrap(); + let dst = RegisterDisplacement { register: Register::Rsp, disp }; + + ( + self.simple_reg_to_memory(from, dst, SimpleInstruction::Mov).unwrap().into(), + Default::default(), + ) + }, + Location::Static { symbol, /*atomic*/ .. } => { + // Aligned stores on amd64 are always atomic + ( + self.simple_reg_to_symbol(from, symbol.clone(), SimpleInstruction::Mov).unwrap().into(), + Default::default(), + ) + }, + } + } + + fn copy_from(self, dst: Register, src: Register) -> (Object, Self::Clobber) { + ( + self.simple_reg_to_reg(src, dst, SimpleInstruction::Mov).unwrap().into(), + Default::default(), + ) + } + + fn add_assign(self, dst: Register, src: Register) -> (Object, Self::Clobber) { + ( + self.simple_reg_to_reg(src, dst, SimpleInstruction::Add).unwrap().into(), + Default::default(), + ) + } + + fn checked_add_assign(self, lhs: Register, rhs: Register) -> (Object, Self::Clobber) { + todo!() + } + + fn sub_assign(self, dst: Register, src: Register) -> (Object, Self::Clobber) { + ( + self.simple_reg_to_reg(src, dst, SimpleInstruction::Sub).unwrap().into(), + Default::default(), + ) + } + + fn checked_sub_assign(self, lhs: Register, rhs: Register) -> (Object, Self::Clobber) { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::Register::*; + use crate::{compiler::Machine, code::Object}; + #[test] + fn add_assign() { + let machine = super::Machine; + let (code, clobbers) = machine.add_assign(Rax, Rax); + assert_eq!(code, Object::from([0x48, 0x01, 0xc0])); + assert_eq!(clobbers, []); + + let (code, clobbers) = machine.add_assign(Rax, R8); + assert_eq!(code, Object::from([0x4c, 0x01, 0xc0])); + assert_eq!(clobbers, []); + + let (code, clobbers) = machine.add_assign(R15, Rax); + assert_eq!(code, Object::from([0x49, 0x01, 0xc7])); + assert_eq!(clobbers, []); + + let (code, clobbers) = machine.add_assign(R10, R10); + assert_eq!(code, Object::from([0x4d, 0x01, 0xd2])); + assert_eq!(clobbers, []); + } +} \ No newline at end of file diff --git a/src/function.rs b/src/function.rs index bc2172d..5deefe4 100644 --- a/src/function.rs +++ b/src/function.rs @@ -237,6 +237,7 @@ impl Function { code += Relocatable::from(function_footer_code()); code += Relocatable { data: function_abort_code().into(), + alignment: 0, symbols: vec![(Symbol::abort(), 0)], abs_symbols: vec![], relocations: vec![], diff --git a/src/lib.rs b/src/lib.rs index 05197b6..336ffa3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,8 @@ pub(crate) mod raw_code; pub(crate) mod commands; pub(crate) mod code; pub mod function; +pub mod ast; +pub mod compiler; #[cfg(test)]