// SPDX-License-Identifier: LCL-1.0 // Copyright (c) 2026 Markus Maiwald // Stewardship: Self Sovereign Society Foundation // // This file is part of the Nexus Commonwealth. // See legal/LICENSE_COMMONWEALTH.md for license terms. //! Rumpk HAL: AArch64 Entry Point (Sovereign Trap Architecture) //! //! This is the hardware floor for ARM64. Sets up exception vectors, //! GIC, Generic Timer, and PL011 UART before handing off to Nim. //! //! SAFETY: Runs in bare-metal EL1 with identity mapping (no MMU in M3.1). const std = @import("std"); const uart = @import("uart.zig"); const gic = @import("gic.zig"); const uart_input = @import("uart_input.zig"); // ========================================================= // L1 Kernel Logic (Nim FFI) // ========================================================= extern fn k_handle_syscall(nr: usize, a0: usize, a1: usize, a2: usize) usize; extern fn k_handle_exception(cause: usize, pc: usize, addr: usize) void; extern fn k_check_deferred_yield() void; extern fn kmain() void; extern fn NimMain() void; extern fn hal_surface_init() void; // ========================================================= // Trap Frame (34 registers * 8 = 272 bytes, 16-byte aligned = 288) // ========================================================= const TrapFrame = extern struct { // x0-x30 (31 GPRs) x: [31]usize, // SP_EL0 (user stack pointer) sp_el0: usize, // Exception Link Register (return address) elr_el1: usize, // Saved Program Status Register spsr_el1: usize, // ESR_EL1 (Exception Syndrome) esr_el1: usize, // FAR_EL1 (Fault Address) far_el1: usize, }; // ========================================================= // Exception Vector Table // ========================================================= // ARM64 requires 16 entries, each 128 bytes (32 instructions), aligned to 2048. // Layout: // [0x000] Current EL, SP0: Sync / IRQ / FIQ / SError // [0x200] Current EL, SPx: Sync / IRQ / FIQ / SError <- kernel traps // [0x400] Lower EL, AArch64: Sync / IRQ / FIQ / SError <- userland traps // [0x600] Lower EL, AArch32: Sync / IRQ / FIQ / SError <- unused // Vector table is built at runtime by install_vectors_asm() — no comptime needed. // ========================================================= // Vector Handlers (Assembly Trampolines) // ========================================================= // Shared context save/restore macro as inline asm. // Saves x0-x30, SP_EL0, ELR_EL1, SPSR_EL1, ESR_EL1, FAR_EL1 onto kernel stack. // Total frame: 36 * 8 = 288 bytes (16-byte aligned). fn save_context() callconv(.naked) void { asm volatile ( // Allocate trap frame (288 bytes = 36 * 8) \\ sub sp, sp, #288 // Save x0-x30 \\ stp x0, x1, [sp, #0] \\ stp x2, x3, [sp, #16] \\ stp x4, x5, [sp, #32] \\ stp x6, x7, [sp, #48] \\ stp x8, x9, [sp, #64] \\ stp x10, x11, [sp, #80] \\ stp x12, x13, [sp, #96] \\ stp x14, x15, [sp, #112] \\ stp x16, x17, [sp, #128] \\ stp x18, x19, [sp, #144] \\ stp x20, x21, [sp, #160] \\ stp x22, x23, [sp, #176] \\ stp x24, x25, [sp, #192] \\ stp x26, x27, [sp, #208] \\ stp x28, x29, [sp, #224] \\ str x30, [sp, #240] // Save SP_EL0 \\ mrs x0, sp_el0 \\ str x0, [sp, #248] // Save ELR_EL1 \\ mrs x0, elr_el1 \\ str x0, [sp, #256] // Save SPSR_EL1 \\ mrs x0, spsr_el1 \\ str x0, [sp, #264] // Save ESR_EL1 \\ mrs x0, esr_el1 \\ str x0, [sp, #272] // Save FAR_EL1 \\ mrs x0, far_el1 \\ str x0, [sp, #280] // x0 = frame pointer (sp) \\ mov x0, sp \\ ret ); } fn restore_context() callconv(.naked) void { asm volatile ( // Restore ELR_EL1 \\ ldr x0, [sp, #256] \\ msr elr_el1, x0 // Restore SPSR_EL1 \\ ldr x0, [sp, #264] \\ msr spsr_el1, x0 // Restore SP_EL0 \\ ldr x0, [sp, #248] \\ msr sp_el0, x0 // Restore x0-x30 \\ ldp x0, x1, [sp, #0] \\ ldp x2, x3, [sp, #16] \\ ldp x4, x5, [sp, #32] \\ ldp x6, x7, [sp, #48] \\ ldp x8, x9, [sp, #64] \\ ldp x10, x11, [sp, #80] \\ ldp x12, x13, [sp, #96] \\ ldp x14, x15, [sp, #112] \\ ldp x16, x17, [sp, #128] \\ ldp x18, x19, [sp, #144] \\ ldp x20, x21, [sp, #160] \\ ldp x22, x23, [sp, #176] \\ ldp x24, x25, [sp, #192] \\ ldp x26, x27, [sp, #208] \\ ldp x28, x29, [sp, #224] \\ ldr x30, [sp, #240] // Deallocate frame \\ add sp, sp, #288 \\ eret ); } // Sync exception from current EL (kernel) export fn vector_sync_handler() callconv(.naked) void { asm volatile ( \\ sub sp, sp, #288 \\ stp x0, x1, [sp, #0] \\ stp x2, x3, [sp, #16] \\ stp x4, x5, [sp, #32] \\ stp x6, x7, [sp, #48] \\ stp x8, x9, [sp, #64] \\ stp x10, x11, [sp, #80] \\ stp x12, x13, [sp, #96] \\ stp x14, x15, [sp, #112] \\ stp x16, x17, [sp, #128] \\ stp x18, x19, [sp, #144] \\ stp x20, x21, [sp, #160] \\ stp x22, x23, [sp, #176] \\ stp x24, x25, [sp, #192] \\ stp x26, x27, [sp, #208] \\ stp x28, x29, [sp, #224] \\ str x30, [sp, #240] \\ mrs x0, sp_el0 \\ str x0, [sp, #248] \\ mrs x0, elr_el1 \\ str x0, [sp, #256] \\ mrs x0, spsr_el1 \\ str x0, [sp, #264] \\ mrs x0, esr_el1 \\ str x0, [sp, #272] \\ mrs x0, far_el1 \\ str x0, [sp, #280] \\ mov x0, sp \\ bl rss_trap_handler \\ ldr x0, [sp, #256] \\ msr elr_el1, x0 \\ ldr x0, [sp, #264] \\ msr spsr_el1, x0 \\ ldr x0, [sp, #248] \\ msr sp_el0, x0 \\ ldp x0, x1, [sp, #0] \\ ldp x2, x3, [sp, #16] \\ ldp x4, x5, [sp, #32] \\ ldp x6, x7, [sp, #48] \\ ldp x8, x9, [sp, #64] \\ ldp x10, x11, [sp, #80] \\ ldp x12, x13, [sp, #96] \\ ldp x14, x15, [sp, #112] \\ ldp x16, x17, [sp, #128] \\ ldp x18, x19, [sp, #144] \\ ldp x20, x21, [sp, #160] \\ ldp x22, x23, [sp, #176] \\ ldp x24, x25, [sp, #192] \\ ldp x26, x27, [sp, #208] \\ ldp x28, x29, [sp, #224] \\ ldr x30, [sp, #240] \\ add sp, sp, #288 \\ eret ); } // IRQ from current EL (kernel) export fn vector_irq_handler() callconv(.naked) void { asm volatile ( \\ sub sp, sp, #288 \\ stp x0, x1, [sp, #0] \\ stp x2, x3, [sp, #16] \\ stp x4, x5, [sp, #32] \\ stp x6, x7, [sp, #48] \\ stp x8, x9, [sp, #64] \\ stp x10, x11, [sp, #80] \\ stp x12, x13, [sp, #96] \\ stp x14, x15, [sp, #112] \\ stp x16, x17, [sp, #128] \\ stp x18, x19, [sp, #144] \\ stp x20, x21, [sp, #160] \\ stp x22, x23, [sp, #176] \\ stp x24, x25, [sp, #192] \\ stp x26, x27, [sp, #208] \\ stp x28, x29, [sp, #224] \\ str x30, [sp, #240] \\ mrs x0, sp_el0 \\ str x0, [sp, #248] \\ mrs x0, elr_el1 \\ str x0, [sp, #256] \\ mrs x0, spsr_el1 \\ str x0, [sp, #264] \\ mrs x0, esr_el1 \\ str x0, [sp, #272] \\ mrs x0, far_el1 \\ str x0, [sp, #280] \\ mov x0, sp \\ bl rss_trap_handler \\ ldr x0, [sp, #256] \\ msr elr_el1, x0 \\ ldr x0, [sp, #264] \\ msr spsr_el1, x0 \\ ldr x0, [sp, #248] \\ msr sp_el0, x0 \\ ldp x0, x1, [sp, #0] \\ ldp x2, x3, [sp, #16] \\ ldp x4, x5, [sp, #32] \\ ldp x6, x7, [sp, #48] \\ ldp x8, x9, [sp, #64] \\ ldp x10, x11, [sp, #80] \\ ldp x12, x13, [sp, #96] \\ ldp x14, x15, [sp, #112] \\ ldp x16, x17, [sp, #128] \\ ldp x18, x19, [sp, #144] \\ ldp x20, x21, [sp, #160] \\ ldp x22, x23, [sp, #176] \\ ldp x24, x25, [sp, #192] \\ ldp x26, x27, [sp, #208] \\ ldp x28, x29, [sp, #224] \\ ldr x30, [sp, #240] \\ add sp, sp, #288 \\ eret ); } // Sync exception from lower EL (userland SVC) export fn vector_sync_lower() callconv(.naked) void { // Same save/restore/eret pattern asm volatile ( \\ sub sp, sp, #288 \\ stp x0, x1, [sp, #0] \\ stp x2, x3, [sp, #16] \\ stp x4, x5, [sp, #32] \\ stp x6, x7, [sp, #48] \\ stp x8, x9, [sp, #64] \\ stp x10, x11, [sp, #80] \\ stp x12, x13, [sp, #96] \\ stp x14, x15, [sp, #112] \\ stp x16, x17, [sp, #128] \\ stp x18, x19, [sp, #144] \\ stp x20, x21, [sp, #160] \\ stp x22, x23, [sp, #176] \\ stp x24, x25, [sp, #192] \\ stp x26, x27, [sp, #208] \\ stp x28, x29, [sp, #224] \\ str x30, [sp, #240] \\ mrs x0, sp_el0 \\ str x0, [sp, #248] \\ mrs x0, elr_el1 \\ str x0, [sp, #256] \\ mrs x0, spsr_el1 \\ str x0, [sp, #264] \\ mrs x0, esr_el1 \\ str x0, [sp, #272] \\ mrs x0, far_el1 \\ str x0, [sp, #280] \\ mov x0, sp \\ bl rss_trap_handler \\ ldr x0, [sp, #256] \\ msr elr_el1, x0 \\ ldr x0, [sp, #264] \\ msr spsr_el1, x0 \\ ldr x0, [sp, #248] \\ msr sp_el0, x0 \\ ldp x0, x1, [sp, #0] \\ ldp x2, x3, [sp, #16] \\ ldp x4, x5, [sp, #32] \\ ldp x6, x7, [sp, #48] \\ ldp x8, x9, [sp, #64] \\ ldp x10, x11, [sp, #80] \\ ldp x12, x13, [sp, #96] \\ ldp x14, x15, [sp, #112] \\ ldp x16, x17, [sp, #128] \\ ldp x18, x19, [sp, #144] \\ ldp x20, x21, [sp, #160] \\ ldp x22, x23, [sp, #176] \\ ldp x24, x25, [sp, #192] \\ ldp x26, x27, [sp, #208] \\ ldp x28, x29, [sp, #224] \\ ldr x30, [sp, #240] \\ add sp, sp, #288 \\ eret ); } // IRQ from lower EL (userland interrupted) export fn vector_irq_lower() callconv(.naked) void { asm volatile ( \\ sub sp, sp, #288 \\ stp x0, x1, [sp, #0] \\ stp x2, x3, [sp, #16] \\ stp x4, x5, [sp, #32] \\ stp x6, x7, [sp, #48] \\ stp x8, x9, [sp, #64] \\ stp x10, x11, [sp, #80] \\ stp x12, x13, [sp, #96] \\ stp x14, x15, [sp, #112] \\ stp x16, x17, [sp, #128] \\ stp x18, x19, [sp, #144] \\ stp x20, x21, [sp, #160] \\ stp x22, x23, [sp, #176] \\ stp x24, x25, [sp, #192] \\ stp x26, x27, [sp, #208] \\ stp x28, x29, [sp, #224] \\ str x30, [sp, #240] \\ mrs x0, sp_el0 \\ str x0, [sp, #248] \\ mrs x0, elr_el1 \\ str x0, [sp, #256] \\ mrs x0, spsr_el1 \\ str x0, [sp, #264] \\ mrs x0, esr_el1 \\ str x0, [sp, #272] \\ mrs x0, far_el1 \\ str x0, [sp, #280] \\ mov x0, sp \\ bl rss_trap_handler \\ ldr x0, [sp, #256] \\ msr elr_el1, x0 \\ ldr x0, [sp, #264] \\ msr spsr_el1, x0 \\ ldr x0, [sp, #248] \\ msr sp_el0, x0 \\ ldp x0, x1, [sp, #0] \\ ldp x2, x3, [sp, #16] \\ ldp x4, x5, [sp, #32] \\ ldp x6, x7, [sp, #48] \\ ldp x8, x9, [sp, #64] \\ ldp x10, x11, [sp, #80] \\ ldp x12, x13, [sp, #96] \\ ldp x14, x15, [sp, #112] \\ ldp x16, x17, [sp, #128] \\ ldp x18, x19, [sp, #144] \\ ldp x20, x21, [sp, #160] \\ ldp x22, x23, [sp, #176] \\ ldp x24, x25, [sp, #192] \\ ldp x26, x27, [sp, #208] \\ ldp x28, x29, [sp, #224] \\ ldr x30, [sp, #240] \\ add sp, sp, #288 \\ eret ); } // ========================================================= // Trap Handler (Zig Logic) // ========================================================= // ESR_EL1 Exception Class codes const EC_SVC64: u6 = 0x15; // SVC in AArch64 const EC_DATA_ABORT_LOWER: u6 = 0x24; const EC_DATA_ABORT_SAME: u6 = 0x25; const EC_INSN_ABORT_LOWER: u6 = 0x20; const EC_INSN_ABORT_SAME: u6 = 0x21; var trap_depth: usize = 0; export fn rss_trap_handler(frame: *TrapFrame) void { trap_depth += 1; if (trap_depth > 3) { uart.print("[Trap] Infinite Loop Detected. Halting.\n"); while (true) { asm volatile ("wfe"); } } defer trap_depth -= 1; const esr = frame.esr_el1; const ec: u6 = @truncate((esr >> 26) & 0x3F); // Determine if this came from IRQ vector or Sync vector // by checking if ESR indicates an interrupt (EC=0 from IRQ vector) // Actually, IRQ vectors call us too — but ESR won't have useful EC for IRQ. // We use a simple heuristic: if called from IRQ vector, EC will be 0 typically. // Better approach: check GIC for pending IRQs first. // Try to claim an IRQ — if one is pending, this is an interrupt const irq = gic.gic_claim(); if (!gic.is_spurious(irq)) { // Interrupt path if (irq == gic.TIMER_IRQ) { // Timer interrupt: acknowledge and disable until rescheduled timer_ack(); k_check_deferred_yield(); } else if (irq == gic.UART_IRQ) { uart_input.poll_input(); } else if (irq >= gic.VIRTIO_MMIO_IRQ_BASE and irq < gic.VIRTIO_MMIO_IRQ_BASE + 32) { // VirtIO MMIO device interrupt — poll net driver const virtio_net = @import("virtio_net.zig"); virtio_net.virtio_net_poll(); } gic.gic_complete(irq); return; } // Synchronous exception path if (ec == EC_SVC64) { // Syscall: x8 = number, x0-x2 = args (ARM64 convention) const nr = frame.x[8]; const a0 = frame.x[0]; const a1 = frame.x[1]; const a2 = frame.x[2]; // Advance PC past SVC instruction frame.elr_el1 += 4; const ret = k_handle_syscall(nr, a0, a1, a2); frame.x[0] = ret; } else if (ec == EC_DATA_ABORT_LOWER or ec == EC_DATA_ABORT_SAME or ec == EC_INSN_ABORT_LOWER or ec == EC_INSN_ABORT_SAME) { uart.print("\n[Trap] Abort! EC:"); uart.print_hex(@as(usize, ec)); uart.print(" PC:"); uart.print_hex(frame.elr_el1); uart.print(" FAR:"); uart.print_hex(frame.far_el1); uart.print("\n"); k_handle_exception(@as(usize, ec), frame.elr_el1, frame.far_el1); while (true) { asm volatile ("wfe"); } } else { uart.print("\n[Trap] Unhandled EC:"); uart.print_hex(@as(usize, ec)); uart.print(" ESR:"); uart.print_hex(esr); uart.print(" PC:"); uart.print_hex(frame.elr_el1); uart.print("\n"); } } // ========================================================= // ARM Generic Timer // ========================================================= var cntfrq: u64 = 0; // Timer frequency (read at init) var ns_per_tick_x16: u64 = 0; // (1e9 / freq) * 16 for fixed-point fn timer_init() void { // Read timer frequency cntfrq = asm volatile ("mrs %[ret], cntfrq_el0" : [ret] "=r" (-> u64), ); if (cntfrq == 0) { // Fallback: QEMU virt typically uses 62.5 MHz cntfrq = 62_500_000; } // Precompute ns_per_tick * 16 for fixed-point math // ns_per_tick = 1_000_000_000 / cntfrq // We use * 16 to avoid floating point: (1e9 * 16) / cntfrq ns_per_tick_x16 = (1_000_000_000 * 16) / cntfrq; // Disable timer initially asm volatile ("msr cntp_ctl_el0, %[val]" : : [val] "r" (@as(u64, 0)), ); } fn timer_ack() void { // Disable timer (mask) to prevent re-firing until rescheduled asm volatile ("msr cntp_ctl_el0, %[val]" : : [val] "r" (@as(u64, 0x2)), // IMASK=1, ENABLE=0 ); } export fn rumpk_timer_now_ns() u64 { const cnt: u64 = asm volatile ("mrs %[ret], cntpct_el0" : [ret] "=r" (-> u64), ); // Convert to nanoseconds using precomputed fixed-point // ns = cnt * ns_per_tick = cnt * (ns_per_tick_x16 / 16) return (cnt * ns_per_tick_x16) >> 4; } export fn rumpk_timer_set_ns(interval_ns: u64) void { if (interval_ns == std.math.maxInt(u64)) { // Disable timer asm volatile ("msr cntp_ctl_el0, %[val]" : : [val] "r" (@as(u64, 0x2)), // IMASK=1 ); return; } // Convert ns to ticks: ticks = ns * cntfrq / 1e9 const ticks = (interval_ns * cntfrq) / 1_000_000_000; // Set countdown value and enable asm volatile ("msr cntp_tval_el0, %[val]" : : [val] "r" (ticks), ); asm volatile ("msr cntp_ctl_el0, %[val]" : : [val] "r" (@as(u64, 0x1)), // ENABLE=1, IMASK=0 ); } // ========================================================= // Identity Map (MMU Setup) // ========================================================= // ARM64 without MMU treats all memory as Device-nGnRnE, which requires // strict alignment. We set up a minimal identity map with: // MAIR index 0: Device-nGnRnE (0x00) — for MMIO // MAIR index 1: Normal Write-Back Cacheable (0xFF) — for RAM // Using 1GB block descriptors at Level 1 (only need L0 + L1 tables). // Page table storage (must be 4096-byte aligned) // 39-bit VA (T0SZ=25) starts walk at L1 — no L0 needed var l1_table: [512]u64 align(4096) = [_]u64{0} ** 512; fn setup_identity_map() void { // MAIR_EL1: index 0 = Device-nGnRnE, index 1 = Normal WB Cacheable const MAIR_VAL: u64 = 0xFF_00; // attr1=0xFF (Normal WB), attr0=0x00 (Device) asm volatile ("msr mair_el1, %[val]" : : [val] "r" (MAIR_VAL), ); // TCR_EL1: 4KB granule, 36-bit PA, T0SZ=25 (39-bit VA = 512GB) // IPS=0b010 (40-bit PA), TG0=0b00 (4KB), SH0=0b11 (Inner Shareable), // ORGN0=0b01 (WB Cacheable), IRGN0=0b01 (WB Cacheable), T0SZ=25 const TCR_VAL: u64 = (0b010 << 32) | // IPS: 40-bit PA (0b00 << 14) | // TG0: 4KB granule (0b11 << 12) | // SH0: Inner Shareable (0b01 << 10) | // ORGN0: Write-Back Cacheable (0b01 << 8) | // IRGN0: Write-Back Cacheable 25; // T0SZ: 39-bit VA space asm volatile ("msr tcr_el1, %[val]" : : [val] "r" (TCR_VAL), ); // With T0SZ=25, VA is 39 bits → translation starts at L1 (no L0 needed). // L1 entry [38:30] = 9 bits → 512 entries, each 1GB block. // TTBR0_EL1 points directly at the L1 table. // Block descriptor: addr[47:30] | AF | SH | AP | AttrIdx | Block(0b01) const BLOCK_DEVICE: u64 = (1 << 10) | // AF (Access Flag) (0b00 << 8) | // SH: Non-shareable (Device) (0b00 << 6) | // AP: EL1 RW (0b00 << 2) | // AttrIdx: 0 (Device-nGnRnE) 0x1; // Block descriptor const BLOCK_NORMAL: u64 = (1 << 10) | // AF (Access Flag) (0b11 << 8) | // SH: Inner Shareable (0b00 << 6) | // AP: EL1 RW (0b01 << 2) | // AttrIdx: 1 (Normal Cacheable) 0x1; // Block descriptor // GB 0 (0x00000000-0x3FFFFFFF): Device (UART, GIC, etc.) l1_table[0] = (0x00000000) | BLOCK_DEVICE; // GB 1 (0x40000000-0x7FFFFFFF): Normal RAM (QEMU virt RAM) l1_table[1] = (0x40000000) | BLOCK_NORMAL; // GB 2 (0x80000000-0xBFFFFFFF): Device (high MMIO) l1_table[2] = (0x80000000) | BLOCK_DEVICE; // GB 3 (0xC0000000-0xFFFFFFFF): Device l1_table[3] = (0xC0000000) | BLOCK_DEVICE; // Set TTBR0_EL1 to point at L1 table directly (39-bit VA starts at L1) const l1_addr = @intFromPtr(&l1_table); asm volatile ("msr ttbr0_el1, %[val]" : : [val] "r" (l1_addr), ); // Invalidate TLB asm volatile ("tlbi vmalle1"); asm volatile ("dsb sy"); asm volatile ("isb"); // Enable MMU + caches in SCTLR_EL1 var sctlr: u64 = 0; asm volatile ("mrs %[out], sctlr_el1" : [out] "=r" (sctlr), ); sctlr |= (1 << 0); // M: Enable MMU sctlr |= (1 << 2); // C: Enable data cache sctlr |= (1 << 12); // I: Enable instruction cache sctlr &= ~@as(u64, 1 << 1); // A: Disable alignment check asm volatile ("msr sctlr_el1, %[val]" : : [val] "r" (sctlr), ); asm volatile ("isb"); } // ========================================================= // Entry Point // ========================================================= // SAFETY(Stack): Memory is immediately used by _start before any read. export var stack_bytes: [64 * 1024]u8 align(16) = undefined; export fn aarch64_init() void { // 1. Initialize UART (PL011) uart.init(); uart.print("[Rumpk L0] aarch64_init reached\n"); // Set up identity-mapped page tables so RAM has Normal memory type. // Without MMU, ARM64 uses Device memory which requires strict alignment. setup_identity_map(); uart.print("[Rumpk L0] Identity map + MMU enabled\n"); // 2. Initialize GIC gic.gic_init(); gic.gic_enable_timer_irq(); uart.print("[Rumpk L0] GICv2 initialized\n"); // 3. Initialize Generic Timer timer_init(); uart.print("[Rumpk L0] Generic Timer initialized (freq="); uart.print_hex(cntfrq); uart.print(")\n"); // 4. Install exception vectors // We write the vector table with proper branch instructions at runtime install_vectors_asm(); uart.print("[Rumpk L0] Exception vectors installed\n"); // 5. Enable IRQs (clear DAIF.I bit) asm volatile ("msr daifclr, #0x2"); // Clear IRQ mask uart.print("[Rumpk ARM64] Handing off to Nim L1...\n"); // 6. Initialize Nim runtime and enter kernel NimMain(); kmain(); rumpk_halt(); } /// Install exception vectors using runtime assembly /// This writes proper branch instructions into the vector table fn install_vectors_asm() void { // Set VBAR_EL1 to point at our vector handler functions // We use a simpler approach: write a small vector table in a static buffer // with branch instructions to our Zig handler functions. // The vector table entries need to branch to our handlers. // ARM64 exception vectors: each entry is 128 bytes (0x80). // We write `b ` at the start of each entry. // For the entries we care about: // 0x200: Current EL SPx Sync -> vector_sync_handler // 0x280: Current EL SPx IRQ -> vector_irq_handler // 0x400: Lower EL Sync -> vector_sync_lower // 0x480: Lower EL IRQ -> vector_irq_lower // We need the vector table to be 2048-byte aligned. // Use our static vector_table_runtime buffer. const table_addr = @intFromPtr(&vector_table_runtime); // Fill with WFE (halt) as default const wfe_insn: u32 = 0xD503205F; // WFE var i: usize = 0; while (i < 2048) : (i += 4) { const ptr: *volatile u32 = @ptrFromInt(table_addr + i); ptr.* = wfe_insn; } // Write branch instructions to our handlers write_branch_to(table_addr + 0x200, @intFromPtr(&vector_sync_handler)); write_branch_to(table_addr + 0x280, @intFromPtr(&vector_irq_handler)); write_branch_to(table_addr + 0x400, @intFromPtr(&vector_sync_lower)); write_branch_to(table_addr + 0x480, @intFromPtr(&vector_irq_lower)); // Set VBAR_EL1 asm volatile ("msr vbar_el1, %[vbar]" : : [vbar] "r" (table_addr), ); asm volatile ("isb"); } /// Runtime-writable vector table (2048 bytes, 2048-byte aligned) var vector_table_runtime: [2048]u8 align(2048) = [_]u8{0} ** 2048; /// Write a branch instruction at `from` that jumps to `target` fn write_branch_to(from: usize, target: usize) void { // ARM64 B instruction: 0x14000000 | (imm26) // imm26 is a signed offset in 4-byte units const offset_bytes: i64 = @as(i64, @intCast(target)) - @as(i64, @intCast(from)); const offset_words: i32 = @intCast(@divExact(offset_bytes, 4)); const imm26: u32 = @as(u32, @bitCast(offset_words)) & 0x03FFFFFF; const insn: u32 = 0x14000000 | imm26; const ptr: *volatile u32 = @ptrFromInt(from); ptr.* = insn; } // ========================================================= // HAL Exports (Contract with L1 Nim Kernel) // ========================================================= export fn hal_console_write(ptr: [*]const u8, len: usize) void { uart.write_bytes(ptr[0..len]); } export fn console_read() c_int { if (uart_input.read_byte()) |b| { return @as(c_int, b); } return -1; } export fn console_poll() void { uart_input.poll_input(); } export fn debug_uart_lsr() u8 { return uart.get_lsr(); } export fn uart_print_hex(value: u64) void { uart.print_hex(value); } export fn uart_print_hex8(value: u8) void { uart.print_hex8(value); } export fn hal_io_init() void { uart.init(); hal_surface_init(); // Initialize VirtIO block storage (MMIO transport) const virtio_block = @import("virtio_block.zig"); virtio_block.init(); } export fn hal_panic(msg: [*:0]const u8) callconv(.c) noreturn { uart.print("[HAL PANIC] "); uart.print(std.mem.span(msg)); uart.print("\n"); rumpk_halt(); } export fn rumpk_halt() noreturn { uart.print("[Rumpk ARM64] Halting.\n"); while (true) { asm volatile ("wfe"); } } export fn hal_kexec(entry: u64, dtb: u64) noreturn { _ = entry; _ = dtb; uart.print("[HAL] kexec not implemented on ARM64\n"); rumpk_halt(); } // ========================================================= // Page Table Infrastructure (M3.3 — 4KB Granule, 39-bit VA) // ========================================================= // The identity map above uses 1GB block descriptors for early boot. // For user isolation we need 4KB page granularity (L1→L2→L3 walk). const PAGE_SIZE: u64 = 4096; const PAGE_SHIFT: u6 = 12; const ENTRIES_PER_TABLE: usize = 512; // ARM64 descriptor bits const DESC_VALID: u64 = 1 << 0; const DESC_TABLE: u64 = 0b11; // L1/L2 table pointer const DESC_PAGE: u64 = 0b11; // L3 page descriptor const DESC_AF: u64 = 1 << 10; // Access Flag const DESC_SH_ISH: u64 = 0b11 << 8; // Inner Shareable const DESC_AP_RW_EL1: u64 = 0b00 << 6; // EL1 RW, EL0 no access const DESC_AP_RW_ALL: u64 = 0b01 << 6; // EL1+EL0 RW const DESC_UXN: u64 = @as(u64, 1) << 54; // Unprivileged Execute Never const DESC_PXN: u64 = @as(u64, 1) << 53; // Privileged Execute Never const ATTR_DEVICE: u64 = 0b00 << 2; // AttrIdx=0 (Device-nGnRnE) const ATTR_NORMAL: u64 = 0b01 << 2; // AttrIdx=1 (Normal WB Cacheable) const DRAM_BASE: u64 = 0x40000000; // Bump allocator for page tables (8MB pool) var pt_alloc_base: u64 = 0; var pt_alloc_offset: u64 = 0; const PT_POOL_SIZE: u64 = 8 * 1024 * 1024; fn pt_init_allocator(base: u64) void { pt_alloc_base = base; pt_alloc_offset = 0; } /// Allocate one zeroed 4KB-aligned page table fn pt_alloc() ?[*]u64 { if (pt_alloc_offset + PAGE_SIZE > PT_POOL_SIZE) { uart.print("[MM] Page table pool exhausted!\n"); return null; } const addr = pt_alloc_base + pt_alloc_offset; pt_alloc_offset += PAGE_SIZE; // Zero all 512 entries const table: [*]volatile u64 = @ptrFromInt(addr); for (0..ENTRIES_PER_TABLE) |i| { table[i] = 0; } return @ptrFromInt(addr); } /// Map a single 4KB page: walk L1→L2→L3, allocating intermediate tables fn map_page(root: [*]u64, va: u64, pa: u64, attrs: u64) void { // 39-bit VA with 4KB granule: // L1 index = VA[38:30] (9 bits) // L2 index = VA[29:21] (9 bits) // L3 index = VA[20:12] (9 bits) const l1_idx = (va >> 30) & 0x1FF; const l2_idx = (va >> 21) & 0x1FF; const l3_idx = (va >> 12) & 0x1FF; // L1 → L2 table const l1_entry = root[l1_idx]; const l2_table: [*]u64 = if (l1_entry & DESC_VALID != 0) @ptrFromInt(l1_entry & 0x0000FFFFFFFFF000) else blk: { const new_l2 = pt_alloc() orelse return; root[l1_idx] = @intFromPtr(new_l2) | DESC_TABLE; break :blk new_l2; }; // L2 → L3 table const l2_entry = l2_table[l2_idx]; const l3_table: [*]u64 = if (l2_entry & DESC_VALID != 0) @ptrFromInt(l2_entry & 0x0000FFFFFFFFF000) else blk: { const new_l3 = pt_alloc() orelse return; l2_table[l2_idx] = @intFromPtr(new_l3) | DESC_TABLE; break :blk new_l3; }; // L3 page descriptor: pa[47:12] | attrs | DESC_PAGE (0b11) l3_table[l3_idx] = (pa & 0x0000FFFFFFFFF000) | attrs | DESC_PAGE; } /// Map a range of pages (va and pa must be page-aligned) fn map_range(root: [*]u64, va_start: u64, pa_start: u64, size: u64, attrs: u64) void { var offset: u64 = 0; while (offset < size) : (offset += PAGE_SIZE) { map_page(root, va_start + offset, pa_start + offset, attrs); } } // ========================================================= // HAL Userland Entry (EL1 → EL0 via eret) // ========================================================= export fn hal_enter_userland(entry: u64, systable: u64, sp: u64) callconv(.c) void { // SPSR_EL1 = 0 → return to EL0t (M[3:0]=0b0000), DAIF clear (IRQs enabled) const spsr: u64 = 0; asm volatile ( \\ msr spsr_el1, %[spsr] \\ msr elr_el1, %[entry] \\ msr sp_el0, %[sp] \\ mov x0, %[systable] \\ eret : : [spsr] "r" (spsr), [entry] "r" (entry), [sp] "r" (sp), [systable] "r" (systable), ); } // ========================================================= // Memory Management (M3.3 — Full Page Tables) // ========================================================= extern fn kprint(s: [*:0]const u8) void; extern fn kprint_hex(n: u64) void; var kernel_ttbr0: u64 = 0; export fn mm_init() callconv(.c) void { // Page table pool at DRAM_BASE + 240MB (same offset as RISC-V) pt_init_allocator(DRAM_BASE + 240 * 1024 * 1024); } export fn mm_enable_kernel_paging() callconv(.c) void { // Identity map is already set up by setup_identity_map() using 1GB blocks. // Store current TTBR0 for later restore after worker map switches. asm volatile ("mrs %[out], ttbr0_el1" : [out] "=r" (kernel_ttbr0), ); } export fn mm_get_kernel_satp() callconv(.c) u64 { return kernel_ttbr0; } export fn mm_create_worker_map( stack_base: u64, stack_size: u64, packet_addr: u64, phys_base: u64, region_size: u64, ) callconv(.c) u64 { const root = pt_alloc() orelse return 0; kprint("[MM] Cellular Map: phys_base="); kprint_hex(phys_base); kprint(" size="); kprint_hex(region_size); kprint("\n"); // Kernel attributes: EL1 RW, Normal cacheable, no EL0 access const kern_attrs = DESC_AF | DESC_SH_ISH | DESC_AP_RW_EL1 | ATTR_NORMAL; // User attributes: EL1+EL0 RW, Normal cacheable const user_attrs = DESC_AF | DESC_SH_ISH | DESC_AP_RW_ALL | ATTR_NORMAL; // Device attributes: EL1 only, Device memory const dev_attrs = DESC_AF | DESC_AP_RW_EL1 | ATTR_DEVICE; // Shared attributes: EL1+EL0 RW, Normal cacheable (for SysTable/ION rings) const shared_attrs = DESC_AF | DESC_SH_ISH | DESC_AP_RW_ALL | ATTR_NORMAL | DESC_UXN; // 1. Kernel memory (0x40000000–0x48000000 = 128MB): EL1 only // Allows kernel trap handlers to execute while worker map is active map_range(root, DRAM_BASE, DRAM_BASE, 128 * 1024 * 1024, kern_attrs); // 2. User cell (identity mapped): EL0 accessible // Init: VA 0x48000000 → PA 0x48000000 (64MB) // Child: VA 0x48000000 → PA phys_base (varies) const user_va_base = DRAM_BASE + 128 * 1024 * 1024; // 0x48000000 map_range(root, user_va_base, phys_base, region_size, user_attrs); // 3. MMIO devices: EL1 only (kernel handles I/O) map_range(root, 0x09000000, 0x09000000, PAGE_SIZE, dev_attrs); // PL011 UART map_range(root, 0x08000000, 0x08000000, 0x20000, dev_attrs); // GICv2 map_range(root, 0x0a000000, 0x0a000000, 0x200 * 32, dev_attrs); // VirtIO MMIO // 4. SysTable + ION rings: EL0 RW (256KB = 64 pages) map_range(root, packet_addr, packet_addr, 64 * PAGE_SIZE, shared_attrs); // 5. Optional kernel stack mapping (if stack_base != 0) if (stack_base != 0) { map_range(root, stack_base, stack_base, stack_size, user_attrs); } kprint("[MM] Worker map created successfully\n"); // Return TTBR0 value (physical address of root table) // ARM64 TTBR has no mode bits like RISC-V SATP — just the address return @intFromPtr(root); } export fn mm_activate_satp(satp_val: u64) callconv(.c) void { asm volatile ("msr ttbr0_el1, %[val]" : : [val] "r" (satp_val), ); asm volatile ("isb"); asm volatile ("tlbi vmalle1"); asm volatile ("dsb sy"); asm volatile ("isb"); } export fn mm_debug_check_va(va: u64) callconv(.c) void { kprint("[MM] Inspecting VA: "); kprint_hex(va); kprint("\n"); // Read current TTBR0 var ttbr0: u64 = 0; asm volatile ("mrs %[out], ttbr0_el1" : [out] "=r" (ttbr0), ); const root: [*]const u64 = @ptrFromInt(ttbr0 & 0x0000FFFFFFFFF000); const l1_idx = (va >> 30) & 0x1FF; const l1_entry = root[l1_idx]; kprint(" L1["); kprint_hex(l1_idx); kprint("]: "); kprint_hex(l1_entry); if (l1_entry & DESC_VALID == 0) { kprint(" (Invalid)\n"); return; } if (l1_entry & 0b10 == 0) { kprint(" (Block)\n"); return; } kprint(" (Table)\n"); const l2: [*]const u64 = @ptrFromInt(l1_entry & 0x0000FFFFFFFFF000); const l2_idx = (va >> 21) & 0x1FF; const l2_entry = l2[l2_idx]; kprint(" L2["); kprint_hex(l2_idx); kprint("]: "); kprint_hex(l2_entry); if (l2_entry & DESC_VALID == 0) { kprint(" (Invalid)\n"); return; } if (l2_entry & 0b10 == 0) { kprint(" (Block)\n"); return; } kprint(" (Table)\n"); const l3: [*]const u64 = @ptrFromInt(l2_entry & 0x0000FFFFFFFFF000); const l3_idx = (va >> 12) & 0x1FF; const l3_entry = l3[l3_idx]; kprint(" L3["); kprint_hex(l3_idx); kprint("]: "); kprint_hex(l3_entry); kprint("\n"); } // VirtIO drivers now provided by virtio_net.zig and virtio_block.zig via abi.zig imports