Skip to content

AST

The parser in the upcoming chapter is responsible for turning Tokens into an abstract syntax tree (AST). It is much nicer to work on the AST compared to the source text.

All JavaScript toolings work on the AST level, for example:

  • A linter (e.g. ESLint) checks the AST for errors
  • A formatter (e.g.prettier) prints the AST back to JavaScript text
  • A minifier (e.g. terser) transforms the AST
  • A bundler connects all import and export statements between ASTs from different files

In this chapter, let's construct a JavaScript AST by using Rust structs and enums.

Getting familiar with the AST

To get ourselves comfortable with an AST, let's visit ASTExplorer and see what it looks like. On the top panel, select JavaScript, and then acorn, type in var a and we will see a tree view and a JSON view.

json
{
  "type": "Program",
  "start": 0,
  "end": 5,
  "body": [
    {
      "type": "VariableDeclaration",
      "start": 0,
      "end": 5,
      "declarations": [
        {
          "type": "VariableDeclarator",
          "start": 4,
          "end": 5,
          "id": {
            "type": "Identifier",
            "start": 4,
            "end": 5,
            "name": "a"
          },
          "init": null
        }
      ],
      "kind": "var"
    }
  ],
  "sourceType": "script"
}
``````rust
#[derive(Debug, Default, Clone, Copy, Serialize, PartialEq, Eq)]
pub struct Node {
    /// Start offset in source
    pub start: usize,

    /// End offset in source
    pub end: usize,
}

impl Node {
    pub fn new(start: usize, end: usize) -> Self {
        Self { start, end }
    }
}
``````rust
pub struct Program {
    pub node: Node,
    pub body: Vec<Statement>,
}

pub enum Statement {
    VariableDeclarationStatement(VariableDeclaration),
}

pub struct VariableDeclaration {
    pub node: Node,
    pub declarations: Vec<VariableDeclarator>,
}

pub struct VariableDeclarator {
    pub node: Node,
    pub id: BindingIdentifier,
    pub init: Option<Expression>,
}

pub struct BindingIdentifier {
    pub node: Node,
    pub name: String,
}

pub enum Expression {
}
``````rust
pub enum Expression {
    AwaitExpression(AwaitExpression),
    YieldExpression(YieldExpression),
}

pub struct AwaitExpression {
    pub node: Node,
    pub expression: Box<Expression>,
}

pub struct YieldExpression {
    pub node: Node,
    pub expression: Box<Expression>,
}
``````rust
use bumpalo::collections::Vec;
use bumpalo::boxed::Box;

pub enum Expression<'a> {
    AwaitExpression(Box<'a, AwaitExpression>),
    YieldExpression(Box<'a, YieldExpression>),
}

pub struct AwaitExpression<'a> {
    pub node: Node,
    pub expression: Expression<'a>,
}

pub struct YieldExpression<'a> {
    pub node: Node,
    pub expression: Expression<'a>,
}
``````rust
enum Name {
    Anonymous, // 0 byte payload
    Nickname(String), // 24 byte payload
    FullName{ first: String, last: String }, // 48 byte payload
}
``````rust
pub enum Expression {
    AwaitExpression(Box<AwaitExpression>),
    YieldExpression(Box<YieldExpression>),
}

pub struct AwaitExpression {
    pub node: Node,
    pub expression: Expression,
}

pub struct YieldExpression {
    pub node: Node,
    pub expression: Expression,
}
``````rust
#[test]
fn no_bloat_enum_sizes() {
    use std::mem::size_of;
    assert_eq!(size_of::<Statement>(), 16);
    assert_eq!(size_of::<Expression>(), 16);
}
``````rust
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/compiler/rustc_ast/src/ast.rs#L3033-L3042

// Some nodes are used a lot. Make sure they don't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
mod size_asserts {
    use super::*;
    use rustc_data_structures::static_assert_size;
    // These are in alphabetical order, which is easy to maintain.
    static_assert_size!(AssocItem, 160);
    static_assert_size!(AssocItemKind, 72);
    static_assert_size!(Attribute, 32);
    static_assert_size!(Block, 48);
``````bash
RUSTFLAGS=-Zprint-type-sizes cargo +nightly build -p name_of_the_crate --release

print-type-size type: ast::js::Statement: 16 bytes, alignment: 8 bytes print-type-size discriminant: 8 bytes print-type-size variant BlockStatement: 8 bytes print-type-size field .0: 8 bytes print-type-size variant BreakStatement: 8 bytes print-type-size field .0: 8 bytes print-type-size variant ContinueStatement: 8 bytes print-type-size field .0: 8 bytes print-type-size variant DebuggerStatement: 8 bytes print-type-size field .0: 8 bytes

rust
use serde::Serialize;

#[derive(Debug, Clone, Serialize, PartialEq)]
#[serde(tag = "type")]
#[cfg_attr(feature = "estree", serde(rename = "Identifier"))]
pub struct IdentifierReference {
    #[serde(flatten)]
    pub node: Node,
    pub name: Atom,
}

#[derive(Debug, Clone, Serialize, PartialEq, Hash)]
#[serde(tag = "type")]
#[cfg_attr(feature = "estree", serde(rename = "Identifier"))]
pub struct BindingIdentifier {
    #[serde(flatten)]
    pub node: Node,
    pub name: Atom,
}

#[derive(Debug, Serialize, PartialEq)]
#[serde(untagged)]
pub enum Expression<'a> {
    ...
}
```