AST
The parser in the upcoming chapter is responsible for turning Tokens into an abstract syntax tree (AST). It is much nicer to work on the AST compared to the source text.
All JavaScript toolings work on the AST level, for example:
- A linter (e.g. ESLint) checks the AST for errors
- A formatter (e.g.prettier) prints the AST back to JavaScript text
- A minifier (e.g. terser) transforms the AST
- A bundler connects all import and export statements between ASTs from different files
In this chapter, let's construct a JavaScript AST by using Rust structs and enums.
Getting familiar with the AST
To get ourselves comfortable with an AST, let's visit ASTExplorer and see what it looks like. On the top panel, select JavaScript, and then acorn, type in var a and we will see a tree view and a JSON view.
{
"type": "Program",
"start": 0,
"end": 5,
"body": [
{
"type": "VariableDeclaration",
"start": 0,
"end": 5,
"declarations": [
{
"type": "VariableDeclarator",
"start": 4,
"end": 5,
"id": {
"type": "Identifier",
"start": 4,
"end": 5,
"name": "a"
},
"init": null
}
],
"kind": "var"
}
],
"sourceType": "script"
}
``````rust
#[derive(Debug, Default, Clone, Copy, Serialize, PartialEq, Eq)]
pub struct Node {
/// Start offset in source
pub start: usize,
/// End offset in source
pub end: usize,
}
impl Node {
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
}
``````rust
pub struct Program {
pub node: Node,
pub body: Vec<Statement>,
}
pub enum Statement {
VariableDeclarationStatement(VariableDeclaration),
}
pub struct VariableDeclaration {
pub node: Node,
pub declarations: Vec<VariableDeclarator>,
}
pub struct VariableDeclarator {
pub node: Node,
pub id: BindingIdentifier,
pub init: Option<Expression>,
}
pub struct BindingIdentifier {
pub node: Node,
pub name: String,
}
pub enum Expression {
}
``````rust
pub enum Expression {
AwaitExpression(AwaitExpression),
YieldExpression(YieldExpression),
}
pub struct AwaitExpression {
pub node: Node,
pub expression: Box<Expression>,
}
pub struct YieldExpression {
pub node: Node,
pub expression: Box<Expression>,
}
``````rust
use bumpalo::collections::Vec;
use bumpalo::boxed::Box;
pub enum Expression<'a> {
AwaitExpression(Box<'a, AwaitExpression>),
YieldExpression(Box<'a, YieldExpression>),
}
pub struct AwaitExpression<'a> {
pub node: Node,
pub expression: Expression<'a>,
}
pub struct YieldExpression<'a> {
pub node: Node,
pub expression: Expression<'a>,
}
``````rust
enum Name {
Anonymous, // 0 byte payload
Nickname(String), // 24 byte payload
FullName{ first: String, last: String }, // 48 byte payload
}
``````rust
pub enum Expression {
AwaitExpression(Box<AwaitExpression>),
YieldExpression(Box<YieldExpression>),
}
pub struct AwaitExpression {
pub node: Node,
pub expression: Expression,
}
pub struct YieldExpression {
pub node: Node,
pub expression: Expression,
}
``````rust
#[test]
fn no_bloat_enum_sizes() {
use std::mem::size_of;
assert_eq!(size_of::<Statement>(), 16);
assert_eq!(size_of::<Expression>(), 16);
}
``````rust
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/compiler/rustc_ast/src/ast.rs#L3033-L3042
// Some nodes are used a lot. Make sure they don't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
mod size_asserts {
use super::*;
use rustc_data_structures::static_assert_size;
// These are in alphabetical order, which is easy to maintain.
static_assert_size!(AssocItem, 160);
static_assert_size!(AssocItemKind, 72);
static_assert_size!(Attribute, 32);
static_assert_size!(Block, 48);
``````bash
RUSTFLAGS=-Zprint-type-sizes cargo +nightly build -p name_of_the_crate --releaseprint-type-size type: ast::js::Statement: 16 bytes, alignment: 8 bytes print-type-size discriminant: 8 bytes print-type-size variant BlockStatement: 8 bytes print-type-size field .0: 8 bytes print-type-size variant BreakStatement: 8 bytes print-type-size field .0: 8 bytes print-type-size variant ContinueStatement: 8 bytes print-type-size field .0: 8 bytes print-type-size variant DebuggerStatement: 8 bytes print-type-size field .0: 8 bytes
use serde::Serialize;
#[derive(Debug, Clone, Serialize, PartialEq)]
#[serde(tag = "type")]
#[cfg_attr(feature = "estree", serde(rename = "Identifier"))]
pub struct IdentifierReference {
#[serde(flatten)]
pub node: Node,
pub name: Atom,
}
#[derive(Debug, Clone, Serialize, PartialEq, Hash)]
#[serde(tag = "type")]
#[cfg_attr(feature = "estree", serde(rename = "Identifier"))]
pub struct BindingIdentifier {
#[serde(flatten)]
pub node: Node,
pub name: Atom,
}
#[derive(Debug, Serialize, PartialEq)]
#[serde(untagged)]
pub enum Expression<'a> {
...
}
```