Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions src/jit/async_compiler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
//! Background JIT compilation thread pool. Moves Cranelift work off the
//! interpreter's hot path so compilation never stalls execution.

use std::collections::HashSet;
use std::sync::{mpsc, Arc, Mutex};
use std::thread;

use crate::mips_exec::DecodedInstr;
use super::cache::{BlockTier, CompiledBlock};
use super::compiler::BlockCompiler;
use super::helpers::HelperPtrs;

const NUM_COMPILER_THREADS: usize = 1;

pub enum CompileKind {
New,
Recompile,
ProfileReplay { content_hash: u32 },
}

pub struct CompileRequest {
pub instrs: Vec<(u32, DecodedInstr)>,
pub block_pc: u64,
pub phys_pc: u64,
pub tier: BlockTier,
pub kind: CompileKind,
}

pub struct CompileResult {
pub block: CompiledBlock,
pub phys_pc: u64,
pub virt_pc: u64,
pub kind: CompileKind,
}

pub struct AsyncCompiler {
tx: Option<mpsc::Sender<CompileRequest>>,
rx: mpsc::Receiver<CompileResult>,
handles: Vec<thread::JoinHandle<()>>,
pub pending: HashSet<(u64, u64)>,
}

impl AsyncCompiler {
pub fn new(helpers: HelperPtrs, capture_ir: bool) -> Self {
let (req_tx, req_rx) = mpsc::channel::<CompileRequest>();
let (res_tx, res_rx) = mpsc::sync_channel::<CompileResult>(64);
let req_rx = Arc::new(Mutex::new(req_rx));

let mut handles = Vec::with_capacity(NUM_COMPILER_THREADS);
for i in 0..NUM_COMPILER_THREADS {
let rx = Arc::clone(&req_rx);
let tx = res_tx.clone();
let h = helpers.clone();
let handle = thread::Builder::new()
.name(format!("jit-compiler-{}", i))
.spawn(move || {
let mut compiler = BlockCompiler::new(&h);
compiler.capture_ir = capture_ir;
loop {
let req = {
let guard = rx.lock().unwrap();
guard.recv()
};
match req {
Ok(req) => {
if let Some(mut block) = compiler.compile_block(&req.instrs, req.block_pc, req.tier) {
block.phys_addr = req.phys_pc;
let _ = tx.send(CompileResult {
block,
phys_pc: req.phys_pc,
virt_pc: req.block_pc,
kind: req.kind,
});
}
}
Err(_) => break,
}
}
})
.expect("failed to spawn JIT compiler thread");
handles.push(handle);
}

eprintln!("JIT: {} background compiler threads", NUM_COMPILER_THREADS);

Self {
tx: Some(req_tx),
rx: res_rx,
handles,
pending: HashSet::new(),
}
}

pub fn submit(&mut self, req: CompileRequest) {
let key = (req.phys_pc, req.block_pc);
if self.pending.contains(&key) {
return;
}
self.pending.insert(key);
if let Some(tx) = &self.tx {
let _ = tx.send(req);
}
}

pub fn try_recv(&mut self) -> Option<CompileResult> {
match self.rx.try_recv() {
Ok(result) => {
self.pending.remove(&(result.phys_pc, result.virt_pc));
Some(result)
}
Err(_) => None,
}
}

pub fn shutdown(&mut self) {
self.tx.take();
for handle in self.handles.drain(..) {
let _ = handle.join();
}
}
}

impl Drop for AsyncCompiler {
fn drop(&mut self) {
self.shutdown();
}
}
6 changes: 6 additions & 0 deletions src/jit/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ pub struct CompiledBlock {
/// FNV-1a hash of the raw instruction words; used to detect stale profile
/// entries when a different DSO is loaded at the same virtual address.
pub content_hash: u32,
/// Block ends with a branch-likely instruction (BEQL/BNEL/etc). Verify mode
/// needs this to adjust step count when the branch is not taken (delay slot
/// nullified = one fewer interpreter step).
pub has_branch_likely: bool,
/// Cranelift CLIF IR captured at compile time (only when IRIS_JIT_VERIFY=1).
pub clif_ir: Option<String>,
}

// Safety: CompiledBlock is only accessed from the CPU thread.
Expand Down
54 changes: 44 additions & 10 deletions src/jit/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct BlockCompiler {
ctx: Context,
builder_ctx: FunctionBuilderContext,
func_id_counter: u32,
pub capture_ir: bool,
// Declared function IDs for memory helpers (registered as imports)
fn_read_u8: FuncId,
fn_read_u16: FuncId,
Expand Down Expand Up @@ -116,6 +117,7 @@ impl BlockCompiler {
jit_module,
builder_ctx: FunctionBuilderContext::new(),
func_id_counter: 0,
capture_ir: false,
fn_read_u8, fn_read_u16, fn_read_u32, fn_read_u64,
fn_write_u8, fn_write_u16, fn_write_u32, fn_write_u64,
fn_interp_step,
Expand Down Expand Up @@ -207,6 +209,7 @@ impl BlockCompiler {
// Emit IR for each instruction
let mut compiled_count = 0u32;
let mut branch_exit_pc: Option<Value> = None;
let mut has_branch_likely = false;

let mut idx = 0;
while idx < instrs.len() {
Expand Down Expand Up @@ -265,6 +268,7 @@ impl BlockCompiler {
break;
}
EmitResult::BranchLikely { taken, not_taken, cond } => {
has_branch_likely = true;
compiled_count += 1;
idx += 1;
if idx < instrs.len() {
Expand Down Expand Up @@ -347,13 +351,20 @@ impl BlockCompiler {
builder.ins().return_(&[]);
builder.finalize();

// Capture CLIF IR before define_function consumes it (for verify diagnostics)
let clif_ir = if self.capture_ir {
Some(format!("{}", self.ctx.func.display()))
} else {
None
};

// Compile to native code
self.jit_module.define_function(func_id, &mut self.ctx).unwrap();
let code_size = self.ctx.compiled_code().unwrap().code_info().total_size;
self.jit_module.clear_context(&mut self.ctx);
self.jit_module.finalize_definitions().unwrap();

let code_ptr = self.jit_module.get_finalized_function(func_id);
let code_size = 0u32; // JITModule doesn't expose size easily; not critical

let content_hash = hash_block_instrs(instrs);

Expand All @@ -362,20 +373,18 @@ impl BlockCompiler {
phys_addr: 0, // filled in by caller
virt_addr: block_pc,
len_mips: compiled_count,
len_native: code_size,
len_native: code_size as u32,
tier,
// Speculative blocks get snapshot/rollback on exception, providing
// self-healing: codegen errors cause exceptions → rollback to correct
// state → demotion after 3 failures → bad block replaced.
//
// Non-speculative is ONLY safe when the block contains stores, because
// rollback can't undo memory writes (RMW double-apply). Load-only blocks
// at any tier should always be speculative for the safety net.
// Speculative blocks get snapshot/rollback on exception. Store-
// containing blocks are non-speculative because the write log
// approach is incompatible with MMIO writes.
speculative: !block_has_stores(instrs),
hit_count: 0,
exception_count: 0,
stable_hits: 0,
content_hash,
has_branch_likely,
clif_ir,
})
}
}
Expand All @@ -388,6 +397,11 @@ fn block_has_stores(instrs: &[(u32, DecodedInstr)]) -> bool {
instrs.iter().any(|(_, d)| matches!(d.op as u32, OP_SB | OP_SH | OP_SW | OP_SD))
}

fn block_store_count(instrs: &[(u32, DecodedInstr)]) -> u32 {
use crate::mips_isa::*;
instrs.iter().filter(|(_, d)| matches!(d.op as u32, OP_SB | OP_SH | OP_SW | OP_SD)).count() as u32
}

/// FNV-1a 32-bit hash of raw instruction words. Used to detect stale profile
/// entries: a different DSO loaded at the same virtual address will have the
/// same length but different instruction bytes.
Expand Down Expand Up @@ -981,6 +995,20 @@ fn flush_modified_gprs(
*modified = 0;
}

fn reload_all_gprs(
builder: &mut FunctionBuilder,
gpr: &mut [Value; 32],
ctx_ptr: Value,
) {
let mem = MemFlags::new();
for i in 1..32usize {
gpr[i] = builder.ins().load(
types::I64, mem, ctx_ptr,
ir::immediates::Offset32::new(JitContext::gpr_offset(i)),
);
}
}

// ─── Load/Store emitters ─────────────────────────────────────────────────────

/// Load width tag passed to emit_load so it applies the correct sign extension.
Expand Down Expand Up @@ -1040,6 +1068,10 @@ fn emit_load(
builder.seal_block(ok_block);
let val = builder.block_params(ok_block)[0];

// Reload ALL GPRs from ctx after helper call. This resets SSA live-value
// pressure so regalloc2 never sees accumulated diamonds from multiple helpers.
reload_all_gprs(builder, gpr, ctx_ptr);

// Apply correct sign/zero extension based on load width
gpr[rt] = match (width, sign_extend) {
(LoadWidth::Byte, true) => {
Expand Down Expand Up @@ -1071,7 +1103,7 @@ fn emit_store(
builder: &mut FunctionBuilder,
ctx_ptr: Value, exec_ptr: Value,
helper: FuncRef,
gpr: &[Value; 32],
gpr: &mut [Value; 32],
rs: usize, rt: usize,
d: &DecodedInstr,
instr_pc: u64,
Expand Down Expand Up @@ -1110,6 +1142,8 @@ fn emit_store(
builder.switch_to_block(ok_block);
builder.seal_block(ok_block);

reload_all_gprs(builder, gpr, ctx_ptr);

EmitResult::Ok
}

Expand Down
31 changes: 31 additions & 0 deletions src/jit/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,27 @@ pub const EXIT_EXCEPTION: u32 = 2;
pub const EXIT_INTERRUPT_CHECK: u32 = 3;
pub const EXIT_HALT: u32 = 4;

/// Max stores we can speculatively track per block. Exceeding this forces the
/// block to be non-speculative (disables rollback for stores past this limit).
pub const WRITE_LOG_CAP: usize = 128;

/// Single entry in the speculative store write log. Records the pre-store
/// value at `addr` so rollback can restore it if the block exceptions.
#[repr(C)]
#[derive(Copy, Clone)]
pub struct WriteLogEntry {
pub addr: u64,
pub old_val: u64,
pub size: u8,
pub _pad: [u8; 7],
}

impl WriteLogEntry {
pub const fn empty() -> Self {
Self { addr: 0, old_val: 0, size: 0, _pad: [0; 7] }
}
}

#[repr(C)]
pub struct JitContext {
// General purpose registers
Expand Down Expand Up @@ -58,6 +79,13 @@ pub struct JitContext {
// Exception status from failed memory access (set by helpers)
pub exception_status: u32,
_pad0: u32,

// Speculative store write log. Each entry records the pre-store value at
// an address. On block rollback (speculative exception), replay in reverse
// to restore memory. On normal exit, reset write_log_len to 0.
pub write_log_len: u32,
_pad1: u32,
pub write_log: [WriteLogEntry; WRITE_LOG_CAP],
}

impl JitContext {
Expand Down Expand Up @@ -86,6 +114,9 @@ impl JitContext {
executor_ptr: 0,
exception_status: 0,
_pad0: 0,
write_log_len: 0,
_pad1: 0,
write_log: [WriteLogEntry::empty(); WRITE_LOG_CAP],
}
}

Expand Down
Loading
Loading