Skip to content

Pursuit of Performance on Building a JavaScript Compiler

Originally posted on https://rustmagazine.org/issue-3/javascript-compiler/

On Performance

After two years of writing Rust, performance has become an ingrained discipline for me - it boils down to allocate less memory and use fewer CPU cycles.

However, achieving optimal performance can be difficult without the knowledge of the problem domain or awareness of potential solutions.

I will take you on my journey of performance and optimization in the following sections. My preferred method of learning is through a combination of research, trial, and error, so the following sections will be organized as such.

Parsing

Oxc is a standard compiler that includes an abstract syntax tree (AST), a lexer, and a recursive descent parser.

Abstract Syntax Tree (AST)

The first architectural design for a compiler is its AST.

All JavaScript tools work on the AST level, for example:

  • A linter (e.g. ESLint) checks the AST for errors
  • A formatter (e.g. prettier) prints the AST back to JavaScript text
  • A minifier (e.g. terser) transforms the AST
  • A bundler connects all import and export statements between ASTs from different files

It will be painful to build these tools if the AST is not user-friendly.

For JavaScript, the most used AST specification is estree. My first AST version replicates estree:

rust
pub struct Program {
    pub node: Node,
    pub body: Vec<Statement>,
}

pub enum Statement {
    VariableDeclarationStatement(VariableDeclaration),
}

pub struct VariableDeclaration {
    pub node: Node,
    pub declarations: Vec<VariableDeclarator>,
}
```rust
pub enum Statement<'ast> {
    Expression(ExpressionNode<'ast>),
}
text
error[E0072]: recursive types `Enum` and `Variant` have infinite size
 --> crates/oxc_linter/src/lib.rs:1:1
  |
1 | enum Enum {
  | ^^^^^^^^^
2 |     Variant(Variant),
  |             ------- recursive without indirection
3 | }
4 | struct Variant {
  | ^^^^^^^^^^^^^^
5 |     field: Enum,
  |            ---- recursive without indirection
  |
help: insert some indirection (e.g., a `Box`, `Rc`, or `&`) to break the cycle
  |
2 ~     Variant(Box<Variant>),
3 | }
4 | struct Variant {
5 ~     field: Box<Enum>,
rust
enum Enum {
    A, // 0 byte payload
    B(String), // 24 byte payload
    C { first: String, last: String }, // 48 byte payload
}
```rust
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
#[test]
fn no_bloat_enum_sizes() {
    use std::mem::size_of;
    use crate::ast::*;
    assert_eq!(size_of::<Statement>(), 16);
    assert_eq!(size_of::<Expression>(), 16);
    assert_eq!(size_of::<Declaration>(), 16);
}
```rust
pub struct Node {
    pub start: usize,
    pub end: usize,
}
```rust
pub struct StringLiteral<'a> {
    pub value: &'a str,
}

pub struct Identifier<'a> {
    pub name: &'a str,
}
```rust
pub(crate) static DYNAMIC_SET: Lazy<Mutex<Set>> = Lazy::new(|| {
    Mutex::new({

// ... in another place
let ptr: std::ptr::NonNull<Entry> =
    DYNAMIC_SET.lock().insert(string_to_add, hash.g);
```rust
pub(crate) fn insert(&mut self, string: Cow<str>, hash: u32) -> NonNull<Entry> {
    let bucket_index = (hash & BUCKET_MASK) as usize;
    {
        let mut ptr: Option<&mut Box<Entry>> = self.buckets[bucket_index].as_mut();

        while let Some(entry) = ptr.take() {
            if entry.hash == hash && *entry.string == *string {
                if entry.ref_count.fetch_add(1, SeqCst) > 0 {
                    return NonNull::from(&mut **entry);
                }
                entry.ref_count.fetch_sub(1, SeqCst);
                break;
            }
            ptr = entry.next_in_bucket.as_mut();
        }
    }
    debug_assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
    let string = string.into_owned();
    let mut entry = Box::new(Entry {
        next_in_bucket: self.buckets[bucket_index].take(),
        hash,
        ref_count: AtomicIsize::new(1),
        string: string.into_boxed_str(),
    });
    let ptr = NonNull::from(&mut *entry);
    self.buckets[bucket_index] = Some(entry);

    ptr
}

https://github.com/servo/string-cache/pull/268

rust
enum Str {
    Static(&'static str),
    Inline(InlineReprensation),
    Heap(String),
}
```rust
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
#[test]
fn test_size() {
    use std::mem::size_of;
    assert_eq!(size_of::<String>(), size_of::<InlineReprensation>());
}
```rust
pub struct Token {
    pub kind: Kind,
}
```rust
pub enum Kind {
    // Keywords
    For,
    While,
    ...
    // Literals
    String(String),
    Num(f64),
    ...
}
```rust
pub struct Token<'a> {
    pub kind: Kind,
    pub value: TokenValue
}

pub enum TokenValue {
    None,
    String(String),
    Num(f64),
}
```rust
pub struct String {
    vec: Vec<u8>,
}

pub struct Vec {
    buf: RawVec<T, A>,
    len: usize,
}

pub struct RawVec {
    ptr: Unique<T>,
    cap: usize,
    alloc: A,
}
```rust
pub enum TokenValue<'a> {
    None,
    String(&'a str),
    Num(f64),
}
```rust
fn finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
    match self.value.take() {
        Some(arena_string) => arena_string.into_bump_str(),
        None => &self.start[..self.start.len() - lexer.chars.as_str().len()],
    }
}
```rust
fn match_keyword(s: &str) -> Self {
    match s {
        "as" => As,
        "do" => Do,
        "if" => If,
        ...
        "constructor" => Constructor,
        _ => Ident,
    }
}
llvm
switch i64 %s.1, label %bb6 [
  i64 2, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit.i"
  i64 3, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit280.i"
  i64 4, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit325.i"
  i64 5, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit380.i"
  i64 6, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit450.i"
  i64 7, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit540.i"
  i64 8, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit590.i"
  i64 9, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit625.i"
  i64 10, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit655.i"
  i64 11, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit665.i"
], !dbg !191362
rust
pub trait Visit<'a>: Sized {
    // ... lots of visit functions

    fn visit_debugger_statement(&mut self, stmt: &'a DebuggerStatement) {
        // report error
    }
}
```rust
struct Node {
    parent: Option<Rc<Node>>,
}
```rust
struct Tree {
    nodes: Vec<Node>
}

struct Node {
    parent: Option<usize> // index into `nodes`
}
```rust
struct Node<'a> {
    kind: AstKind<'a>
}

enum AstKind<'a> {
    BlockStatement(&'a BlockStatement<'a>),
    // ...
    ArrayExpression(&'a ArrayExpression<'a>),
    // ...
    Class(&'a Class<'a>),
    // ...
}
```rust
pub trait Visit<'a> {
    fn enter_node(&mut self, _kind: AstKind<'a>) {}
    fn leave_node(&mut self, _kind: AstKind<'a>) {}

    fn visit_block_statement(&mut self, stmt: &'a BlockStatement<'a>) {
        let kind = AstKind::BlockStatement(stmt);
        self.enter_node(kind);
        self.visit_statements(&stmt.body);
        self.leave_node(kind);
    }
}

impl<'a> Visit<'a> for TreeBuilder<'a> {
    fn enter_node(&mut self, kind: AstKind<'a>) {
        self.push_ast_node(kind);
    }

    fn leave_node(&mut self, kind: AstKind<'a>) {
        self.pop_ast_node();
    }
}
```rust
for node in nodes {
    match node.get().kind {
        AstKind::DebuggerStatement(stmt) => {
        // report error
        }
        _ => {}
    }
}
```rust
let walk = Walk::new(&self.options);
rayon::spawn(move || {
    walk.iter().for_each(|path| {
        tx_path.send(path).unwrap();
    });
});

let linter = Arc::clone(&self.linter);
rayon::spawn(move || {
    while let Ok(path) = rx_path.recv() {
        let tx_error = tx_error.clone();
        let linter = Arc::clone(&linter);
        rayon::spawn(move || {
            if let Some(diagnostics) = Self::lint_path(&linter, &path) {
                tx_error.send(diagnostics).unwrap();
            }
            drop(tx_error);
        });
    }
});
```rust
use std::io::{self, Write};

let stdout = io::stdout(); // get the global stdout entity
let mut handle = io::BufWriter::new(stdout); // optional: wrap that handle in a buffer
writeln!(handle, "foo: {}", 42); // add `?` if you care about errors here
```rust
let stdout = io::stdout(); // get the global stdout entity
let mut handle = stdout.lock(); // acquire a lock on it
writeln!(handle, "foo: {}", 42); // add `?` if you care about errors here