Pursuit of Performance on Building a JavaScript Compiler
Originally posted on https://rustmagazine.org/issue-3/javascript-compiler/
On Performance
After two years of writing Rust, performance has become an ingrained discipline for me - it boils down to allocate less memory and use fewer CPU cycles.
However, achieving optimal performance can be difficult without the knowledge of the problem domain or awareness of potential solutions.
I will take you on my journey of performance and optimization in the following sections. My preferred method of learning is through a combination of research, trial, and error, so the following sections will be organized as such.
Parsing
Oxc is a standard compiler that includes an abstract syntax tree (AST), a lexer, and a recursive descent parser.
Abstract Syntax Tree (AST)
The first architectural design for a compiler is its AST.
All JavaScript tools work on the AST level, for example:
- A linter (e.g. ESLint) checks the AST for errors
- A formatter (e.g. prettier) prints the AST back to JavaScript text
- A minifier (e.g. terser) transforms the AST
- A bundler connects all import and export statements between ASTs from different files
It will be painful to build these tools if the AST is not user-friendly.
For JavaScript, the most used AST specification is estree. My first AST version replicates estree:
pub struct Program {
pub node: Node,
pub body: Vec<Statement>,
}
pub enum Statement {
VariableDeclarationStatement(VariableDeclaration),
}
pub struct VariableDeclaration {
pub node: Node,
pub declarations: Vec<VariableDeclarator>,
}
```rust
pub enum Statement<'ast> {
Expression(ExpressionNode<'ast>),
}error[E0072]: recursive types `Enum` and `Variant` have infinite size
--> crates/oxc_linter/src/lib.rs:1:1
|
1 | enum Enum {
| ^^^^^^^^^
2 | Variant(Variant),
| ------- recursive without indirection
3 | }
4 | struct Variant {
| ^^^^^^^^^^^^^^
5 | field: Enum,
| ---- recursive without indirection
|
help: insert some indirection (e.g., a `Box`, `Rc`, or `&`) to break the cycle
|
2 ~ Variant(Box<Variant>),
3 | }
4 | struct Variant {
5 ~ field: Box<Enum>,enum Enum {
A, // 0 byte payload
B(String), // 24 byte payload
C { first: String, last: String }, // 48 byte payload
}
```rust
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
#[test]
fn no_bloat_enum_sizes() {
use std::mem::size_of;
use crate::ast::*;
assert_eq!(size_of::<Statement>(), 16);
assert_eq!(size_of::<Expression>(), 16);
assert_eq!(size_of::<Declaration>(), 16);
}
```rust
pub struct Node {
pub start: usize,
pub end: usize,
}
```rust
pub struct StringLiteral<'a> {
pub value: &'a str,
}
pub struct Identifier<'a> {
pub name: &'a str,
}
```rust
pub(crate) static DYNAMIC_SET: Lazy<Mutex<Set>> = Lazy::new(|| {
Mutex::new({
// ... in another place
let ptr: std::ptr::NonNull<Entry> =
DYNAMIC_SET.lock().insert(string_to_add, hash.g);
```rust
pub(crate) fn insert(&mut self, string: Cow<str>, hash: u32) -> NonNull<Entry> {
let bucket_index = (hash & BUCKET_MASK) as usize;
{
let mut ptr: Option<&mut Box<Entry>> = self.buckets[bucket_index].as_mut();
while let Some(entry) = ptr.take() {
if entry.hash == hash && *entry.string == *string {
if entry.ref_count.fetch_add(1, SeqCst) > 0 {
return NonNull::from(&mut **entry);
}
entry.ref_count.fetch_sub(1, SeqCst);
break;
}
ptr = entry.next_in_bucket.as_mut();
}
}
debug_assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
let string = string.into_owned();
let mut entry = Box::new(Entry {
next_in_bucket: self.buckets[bucket_index].take(),
hash,
ref_count: AtomicIsize::new(1),
string: string.into_boxed_str(),
});
let ptr = NonNull::from(&mut *entry);
self.buckets[bucket_index] = Some(entry);
ptr
}https://github.com/servo/string-cache/pull/268
enum Str {
Static(&'static str),
Inline(InlineReprensation),
Heap(String),
}
```rust
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
#[test]
fn test_size() {
use std::mem::size_of;
assert_eq!(size_of::<String>(), size_of::<InlineReprensation>());
}
```rust
pub struct Token {
pub kind: Kind,
}
```rust
pub enum Kind {
// Keywords
For,
While,
...
// Literals
String(String),
Num(f64),
...
}
```rust
pub struct Token<'a> {
pub kind: Kind,
pub value: TokenValue
}
pub enum TokenValue {
None,
String(String),
Num(f64),
}
```rust
pub struct String {
vec: Vec<u8>,
}
pub struct Vec {
buf: RawVec<T, A>,
len: usize,
}
pub struct RawVec {
ptr: Unique<T>,
cap: usize,
alloc: A,
}
```rust
pub enum TokenValue<'a> {
None,
String(&'a str),
Num(f64),
}
```rust
fn finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str {
match self.value.take() {
Some(arena_string) => arena_string.into_bump_str(),
None => &self.start[..self.start.len() - lexer.chars.as_str().len()],
}
}
```rust
fn match_keyword(s: &str) -> Self {
match s {
"as" => As,
"do" => Do,
"if" => If,
...
"constructor" => Constructor,
_ => Ident,
}
}switch i64 %s.1, label %bb6 [
i64 2, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit.i"
i64 3, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit280.i"
i64 4, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit325.i"
i64 5, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit380.i"
i64 6, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit450.i"
i64 7, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit540.i"
i64 8, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit590.i"
i64 9, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit625.i"
i64 10, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit655.i"
i64 11, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit665.i"
], !dbg !191362pub trait Visit<'a>: Sized {
// ... lots of visit functions
fn visit_debugger_statement(&mut self, stmt: &'a DebuggerStatement) {
// report error
}
}
```rust
struct Node {
parent: Option<Rc<Node>>,
}
```rust
struct Tree {
nodes: Vec<Node>
}
struct Node {
parent: Option<usize> // index into `nodes`
}
```rust
struct Node<'a> {
kind: AstKind<'a>
}
enum AstKind<'a> {
BlockStatement(&'a BlockStatement<'a>),
// ...
ArrayExpression(&'a ArrayExpression<'a>),
// ...
Class(&'a Class<'a>),
// ...
}
```rust
pub trait Visit<'a> {
fn enter_node(&mut self, _kind: AstKind<'a>) {}
fn leave_node(&mut self, _kind: AstKind<'a>) {}
fn visit_block_statement(&mut self, stmt: &'a BlockStatement<'a>) {
let kind = AstKind::BlockStatement(stmt);
self.enter_node(kind);
self.visit_statements(&stmt.body);
self.leave_node(kind);
}
}
impl<'a> Visit<'a> for TreeBuilder<'a> {
fn enter_node(&mut self, kind: AstKind<'a>) {
self.push_ast_node(kind);
}
fn leave_node(&mut self, kind: AstKind<'a>) {
self.pop_ast_node();
}
}
```rust
for node in nodes {
match node.get().kind {
AstKind::DebuggerStatement(stmt) => {
// report error
}
_ => {}
}
}
```rust
let walk = Walk::new(&self.options);
rayon::spawn(move || {
walk.iter().for_each(|path| {
tx_path.send(path).unwrap();
});
});
let linter = Arc::clone(&self.linter);
rayon::spawn(move || {
while let Ok(path) = rx_path.recv() {
let tx_error = tx_error.clone();
let linter = Arc::clone(&linter);
rayon::spawn(move || {
if let Some(diagnostics) = Self::lint_path(&linter, &path) {
tx_error.send(diagnostics).unwrap();
}
drop(tx_error);
});
}
});
```rust
use std::io::{self, Write};
let stdout = io::stdout(); // get the global stdout entity
let mut handle = io::BufWriter::new(stdout); // optional: wrap that handle in a buffer
writeln!(handle, "foo: {}", 42); // add `?` if you care about errors here
```rust
let stdout = io::stdout(); // get the global stdout entity
let mut handle = stdout.lock(); // acquire a lock on it
writeln!(handle, "foo: {}", 42); // add `?` if you care about errors here