rumpk/hal/entry_aarch64.zig

1061 lines
34 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// SPDX-License-Identifier: LCL-1.0
// Copyright (c) 2026 Markus Maiwald
// Stewardship: Self Sovereign Society Foundation
//
// This file is part of the Nexus Commonwealth.
// See legal/LICENSE_COMMONWEALTH.md for license terms.
//! Rumpk HAL: AArch64 Entry Point (Sovereign Trap Architecture)
//!
//! This is the hardware floor for ARM64. Sets up exception vectors,
//! GIC, Generic Timer, and PL011 UART before handing off to Nim.
//!
//! SAFETY: Runs in bare-metal EL1 with identity mapping (no MMU in M3.1).
const std = @import("std");
const uart = @import("uart.zig");
const gic = @import("gic.zig");
const uart_input = @import("uart_input.zig");
// =========================================================
// L1 Kernel Logic (Nim FFI)
// =========================================================
extern fn k_handle_syscall(nr: usize, a0: usize, a1: usize, a2: usize) usize;
extern fn k_handle_exception(cause: usize, pc: usize, addr: usize) void;
extern fn k_check_deferred_yield() void;
extern fn kmain() void;
extern fn NimMain() void;
extern fn hal_surface_init() void;
// =========================================================
// Trap Frame (34 registers * 8 = 272 bytes, 16-byte aligned = 288)
// =========================================================
const TrapFrame = extern struct {
// x0-x30 (31 GPRs)
x: [31]usize,
// SP_EL0 (user stack pointer)
sp_el0: usize,
// Exception Link Register (return address)
elr_el1: usize,
// Saved Program Status Register
spsr_el1: usize,
// ESR_EL1 (Exception Syndrome)
esr_el1: usize,
// FAR_EL1 (Fault Address)
far_el1: usize,
};
// =========================================================
// Exception Vector Table
// =========================================================
// ARM64 requires 16 entries, each 128 bytes (32 instructions), aligned to 2048.
// Layout:
// [0x000] Current EL, SP0: Sync / IRQ / FIQ / SError
// [0x200] Current EL, SPx: Sync / IRQ / FIQ / SError <- kernel traps
// [0x400] Lower EL, AArch64: Sync / IRQ / FIQ / SError <- userland traps
// [0x600] Lower EL, AArch32: Sync / IRQ / FIQ / SError <- unused
// Vector table is built at runtime by install_vectors_asm() — no comptime needed.
// =========================================================
// Vector Handlers (Assembly Trampolines)
// =========================================================
// Shared context save/restore macro as inline asm.
// Saves x0-x30, SP_EL0, ELR_EL1, SPSR_EL1, ESR_EL1, FAR_EL1 onto kernel stack.
// Total frame: 36 * 8 = 288 bytes (16-byte aligned).
fn save_context() callconv(.naked) void {
asm volatile (
// Allocate trap frame (288 bytes = 36 * 8)
\\ sub sp, sp, #288
// Save x0-x30
\\ stp x0, x1, [sp, #0]
\\ stp x2, x3, [sp, #16]
\\ stp x4, x5, [sp, #32]
\\ stp x6, x7, [sp, #48]
\\ stp x8, x9, [sp, #64]
\\ stp x10, x11, [sp, #80]
\\ stp x12, x13, [sp, #96]
\\ stp x14, x15, [sp, #112]
\\ stp x16, x17, [sp, #128]
\\ stp x18, x19, [sp, #144]
\\ stp x20, x21, [sp, #160]
\\ stp x22, x23, [sp, #176]
\\ stp x24, x25, [sp, #192]
\\ stp x26, x27, [sp, #208]
\\ stp x28, x29, [sp, #224]
\\ str x30, [sp, #240]
// Save SP_EL0
\\ mrs x0, sp_el0
\\ str x0, [sp, #248]
// Save ELR_EL1
\\ mrs x0, elr_el1
\\ str x0, [sp, #256]
// Save SPSR_EL1
\\ mrs x0, spsr_el1
\\ str x0, [sp, #264]
// Save ESR_EL1
\\ mrs x0, esr_el1
\\ str x0, [sp, #272]
// Save FAR_EL1
\\ mrs x0, far_el1
\\ str x0, [sp, #280]
// x0 = frame pointer (sp)
\\ mov x0, sp
\\ ret
);
}
fn restore_context() callconv(.naked) void {
asm volatile (
// Restore ELR_EL1
\\ ldr x0, [sp, #256]
\\ msr elr_el1, x0
// Restore SPSR_EL1
\\ ldr x0, [sp, #264]
\\ msr spsr_el1, x0
// Restore SP_EL0
\\ ldr x0, [sp, #248]
\\ msr sp_el0, x0
// Restore x0-x30
\\ ldp x0, x1, [sp, #0]
\\ ldp x2, x3, [sp, #16]
\\ ldp x4, x5, [sp, #32]
\\ ldp x6, x7, [sp, #48]
\\ ldp x8, x9, [sp, #64]
\\ ldp x10, x11, [sp, #80]
\\ ldp x12, x13, [sp, #96]
\\ ldp x14, x15, [sp, #112]
\\ ldp x16, x17, [sp, #128]
\\ ldp x18, x19, [sp, #144]
\\ ldp x20, x21, [sp, #160]
\\ ldp x22, x23, [sp, #176]
\\ ldp x24, x25, [sp, #192]
\\ ldp x26, x27, [sp, #208]
\\ ldp x28, x29, [sp, #224]
\\ ldr x30, [sp, #240]
// Deallocate frame
\\ add sp, sp, #288
\\ eret
);
}
// Sync exception from current EL (kernel)
export fn vector_sync_handler() callconv(.naked) void {
asm volatile (
\\ sub sp, sp, #288
\\ stp x0, x1, [sp, #0]
\\ stp x2, x3, [sp, #16]
\\ stp x4, x5, [sp, #32]
\\ stp x6, x7, [sp, #48]
\\ stp x8, x9, [sp, #64]
\\ stp x10, x11, [sp, #80]
\\ stp x12, x13, [sp, #96]
\\ stp x14, x15, [sp, #112]
\\ stp x16, x17, [sp, #128]
\\ stp x18, x19, [sp, #144]
\\ stp x20, x21, [sp, #160]
\\ stp x22, x23, [sp, #176]
\\ stp x24, x25, [sp, #192]
\\ stp x26, x27, [sp, #208]
\\ stp x28, x29, [sp, #224]
\\ str x30, [sp, #240]
\\ mrs x0, sp_el0
\\ str x0, [sp, #248]
\\ mrs x0, elr_el1
\\ str x0, [sp, #256]
\\ mrs x0, spsr_el1
\\ str x0, [sp, #264]
\\ mrs x0, esr_el1
\\ str x0, [sp, #272]
\\ mrs x0, far_el1
\\ str x0, [sp, #280]
\\ mov x0, sp
\\ bl rss_trap_handler
\\ ldr x0, [sp, #256]
\\ msr elr_el1, x0
\\ ldr x0, [sp, #264]
\\ msr spsr_el1, x0
\\ ldr x0, [sp, #248]
\\ msr sp_el0, x0
\\ ldp x0, x1, [sp, #0]
\\ ldp x2, x3, [sp, #16]
\\ ldp x4, x5, [sp, #32]
\\ ldp x6, x7, [sp, #48]
\\ ldp x8, x9, [sp, #64]
\\ ldp x10, x11, [sp, #80]
\\ ldp x12, x13, [sp, #96]
\\ ldp x14, x15, [sp, #112]
\\ ldp x16, x17, [sp, #128]
\\ ldp x18, x19, [sp, #144]
\\ ldp x20, x21, [sp, #160]
\\ ldp x22, x23, [sp, #176]
\\ ldp x24, x25, [sp, #192]
\\ ldp x26, x27, [sp, #208]
\\ ldp x28, x29, [sp, #224]
\\ ldr x30, [sp, #240]
\\ add sp, sp, #288
\\ eret
);
}
// IRQ from current EL (kernel)
export fn vector_irq_handler() callconv(.naked) void {
asm volatile (
\\ sub sp, sp, #288
\\ stp x0, x1, [sp, #0]
\\ stp x2, x3, [sp, #16]
\\ stp x4, x5, [sp, #32]
\\ stp x6, x7, [sp, #48]
\\ stp x8, x9, [sp, #64]
\\ stp x10, x11, [sp, #80]
\\ stp x12, x13, [sp, #96]
\\ stp x14, x15, [sp, #112]
\\ stp x16, x17, [sp, #128]
\\ stp x18, x19, [sp, #144]
\\ stp x20, x21, [sp, #160]
\\ stp x22, x23, [sp, #176]
\\ stp x24, x25, [sp, #192]
\\ stp x26, x27, [sp, #208]
\\ stp x28, x29, [sp, #224]
\\ str x30, [sp, #240]
\\ mrs x0, sp_el0
\\ str x0, [sp, #248]
\\ mrs x0, elr_el1
\\ str x0, [sp, #256]
\\ mrs x0, spsr_el1
\\ str x0, [sp, #264]
\\ mrs x0, esr_el1
\\ str x0, [sp, #272]
\\ mrs x0, far_el1
\\ str x0, [sp, #280]
\\ mov x0, sp
\\ bl rss_trap_handler
\\ ldr x0, [sp, #256]
\\ msr elr_el1, x0
\\ ldr x0, [sp, #264]
\\ msr spsr_el1, x0
\\ ldr x0, [sp, #248]
\\ msr sp_el0, x0
\\ ldp x0, x1, [sp, #0]
\\ ldp x2, x3, [sp, #16]
\\ ldp x4, x5, [sp, #32]
\\ ldp x6, x7, [sp, #48]
\\ ldp x8, x9, [sp, #64]
\\ ldp x10, x11, [sp, #80]
\\ ldp x12, x13, [sp, #96]
\\ ldp x14, x15, [sp, #112]
\\ ldp x16, x17, [sp, #128]
\\ ldp x18, x19, [sp, #144]
\\ ldp x20, x21, [sp, #160]
\\ ldp x22, x23, [sp, #176]
\\ ldp x24, x25, [sp, #192]
\\ ldp x26, x27, [sp, #208]
\\ ldp x28, x29, [sp, #224]
\\ ldr x30, [sp, #240]
\\ add sp, sp, #288
\\ eret
);
}
// Sync exception from lower EL (userland SVC)
export fn vector_sync_lower() callconv(.naked) void {
// Same save/restore/eret pattern
asm volatile (
\\ sub sp, sp, #288
\\ stp x0, x1, [sp, #0]
\\ stp x2, x3, [sp, #16]
\\ stp x4, x5, [sp, #32]
\\ stp x6, x7, [sp, #48]
\\ stp x8, x9, [sp, #64]
\\ stp x10, x11, [sp, #80]
\\ stp x12, x13, [sp, #96]
\\ stp x14, x15, [sp, #112]
\\ stp x16, x17, [sp, #128]
\\ stp x18, x19, [sp, #144]
\\ stp x20, x21, [sp, #160]
\\ stp x22, x23, [sp, #176]
\\ stp x24, x25, [sp, #192]
\\ stp x26, x27, [sp, #208]
\\ stp x28, x29, [sp, #224]
\\ str x30, [sp, #240]
\\ mrs x0, sp_el0
\\ str x0, [sp, #248]
\\ mrs x0, elr_el1
\\ str x0, [sp, #256]
\\ mrs x0, spsr_el1
\\ str x0, [sp, #264]
\\ mrs x0, esr_el1
\\ str x0, [sp, #272]
\\ mrs x0, far_el1
\\ str x0, [sp, #280]
\\ mov x0, sp
\\ bl rss_trap_handler
\\ ldr x0, [sp, #256]
\\ msr elr_el1, x0
\\ ldr x0, [sp, #264]
\\ msr spsr_el1, x0
\\ ldr x0, [sp, #248]
\\ msr sp_el0, x0
\\ ldp x0, x1, [sp, #0]
\\ ldp x2, x3, [sp, #16]
\\ ldp x4, x5, [sp, #32]
\\ ldp x6, x7, [sp, #48]
\\ ldp x8, x9, [sp, #64]
\\ ldp x10, x11, [sp, #80]
\\ ldp x12, x13, [sp, #96]
\\ ldp x14, x15, [sp, #112]
\\ ldp x16, x17, [sp, #128]
\\ ldp x18, x19, [sp, #144]
\\ ldp x20, x21, [sp, #160]
\\ ldp x22, x23, [sp, #176]
\\ ldp x24, x25, [sp, #192]
\\ ldp x26, x27, [sp, #208]
\\ ldp x28, x29, [sp, #224]
\\ ldr x30, [sp, #240]
\\ add sp, sp, #288
\\ eret
);
}
// IRQ from lower EL (userland interrupted)
export fn vector_irq_lower() callconv(.naked) void {
asm volatile (
\\ sub sp, sp, #288
\\ stp x0, x1, [sp, #0]
\\ stp x2, x3, [sp, #16]
\\ stp x4, x5, [sp, #32]
\\ stp x6, x7, [sp, #48]
\\ stp x8, x9, [sp, #64]
\\ stp x10, x11, [sp, #80]
\\ stp x12, x13, [sp, #96]
\\ stp x14, x15, [sp, #112]
\\ stp x16, x17, [sp, #128]
\\ stp x18, x19, [sp, #144]
\\ stp x20, x21, [sp, #160]
\\ stp x22, x23, [sp, #176]
\\ stp x24, x25, [sp, #192]
\\ stp x26, x27, [sp, #208]
\\ stp x28, x29, [sp, #224]
\\ str x30, [sp, #240]
\\ mrs x0, sp_el0
\\ str x0, [sp, #248]
\\ mrs x0, elr_el1
\\ str x0, [sp, #256]
\\ mrs x0, spsr_el1
\\ str x0, [sp, #264]
\\ mrs x0, esr_el1
\\ str x0, [sp, #272]
\\ mrs x0, far_el1
\\ str x0, [sp, #280]
\\ mov x0, sp
\\ bl rss_trap_handler
\\ ldr x0, [sp, #256]
\\ msr elr_el1, x0
\\ ldr x0, [sp, #264]
\\ msr spsr_el1, x0
\\ ldr x0, [sp, #248]
\\ msr sp_el0, x0
\\ ldp x0, x1, [sp, #0]
\\ ldp x2, x3, [sp, #16]
\\ ldp x4, x5, [sp, #32]
\\ ldp x6, x7, [sp, #48]
\\ ldp x8, x9, [sp, #64]
\\ ldp x10, x11, [sp, #80]
\\ ldp x12, x13, [sp, #96]
\\ ldp x14, x15, [sp, #112]
\\ ldp x16, x17, [sp, #128]
\\ ldp x18, x19, [sp, #144]
\\ ldp x20, x21, [sp, #160]
\\ ldp x22, x23, [sp, #176]
\\ ldp x24, x25, [sp, #192]
\\ ldp x26, x27, [sp, #208]
\\ ldp x28, x29, [sp, #224]
\\ ldr x30, [sp, #240]
\\ add sp, sp, #288
\\ eret
);
}
// =========================================================
// Trap Handler (Zig Logic)
// =========================================================
// ESR_EL1 Exception Class codes
const EC_SVC64: u6 = 0x15; // SVC in AArch64
const EC_DATA_ABORT_LOWER: u6 = 0x24;
const EC_DATA_ABORT_SAME: u6 = 0x25;
const EC_INSN_ABORT_LOWER: u6 = 0x20;
const EC_INSN_ABORT_SAME: u6 = 0x21;
var trap_depth: usize = 0;
export fn rss_trap_handler(frame: *TrapFrame) void {
trap_depth += 1;
if (trap_depth > 3) {
uart.print("[Trap] Infinite Loop Detected. Halting.\n");
while (true) {
asm volatile ("wfe");
}
}
defer trap_depth -= 1;
const esr = frame.esr_el1;
const ec: u6 = @truncate((esr >> 26) & 0x3F);
// Determine if this came from IRQ vector or Sync vector
// by checking if ESR indicates an interrupt (EC=0 from IRQ vector)
// Actually, IRQ vectors call us too — but ESR won't have useful EC for IRQ.
// We use a simple heuristic: if called from IRQ vector, EC will be 0 typically.
// Better approach: check GIC for pending IRQs first.
// Try to claim an IRQ — if one is pending, this is an interrupt
const irq = gic.gic_claim();
if (!gic.is_spurious(irq)) {
// Interrupt path
if (irq == gic.TIMER_IRQ) {
// Timer interrupt: acknowledge and disable until rescheduled
timer_ack();
k_check_deferred_yield();
} else if (irq == gic.UART_IRQ) {
uart_input.poll_input();
} else if (irq >= gic.VIRTIO_MMIO_IRQ_BASE and irq < gic.VIRTIO_MMIO_IRQ_BASE + 32) {
// VirtIO MMIO device interrupt — poll net driver
const virtio_net = @import("virtio_net.zig");
virtio_net.virtio_net_poll();
}
gic.gic_complete(irq);
return;
}
// Synchronous exception path
if (ec == EC_SVC64) {
// Syscall: x8 = number, x0-x2 = args (ARM64 convention)
const nr = frame.x[8];
const a0 = frame.x[0];
const a1 = frame.x[1];
const a2 = frame.x[2];
// Advance PC past SVC instruction
frame.elr_el1 += 4;
const ret = k_handle_syscall(nr, a0, a1, a2);
frame.x[0] = ret;
} else if (ec == EC_DATA_ABORT_LOWER or ec == EC_DATA_ABORT_SAME or
ec == EC_INSN_ABORT_LOWER or ec == EC_INSN_ABORT_SAME)
{
uart.print("\n[Trap] Abort! EC:");
uart.print_hex(@as(usize, ec));
uart.print(" PC:");
uart.print_hex(frame.elr_el1);
uart.print(" FAR:");
uart.print_hex(frame.far_el1);
uart.print("\n");
k_handle_exception(@as(usize, ec), frame.elr_el1, frame.far_el1);
while (true) {
asm volatile ("wfe");
}
} else {
uart.print("\n[Trap] Unhandled EC:");
uart.print_hex(@as(usize, ec));
uart.print(" ESR:");
uart.print_hex(esr);
uart.print(" PC:");
uart.print_hex(frame.elr_el1);
uart.print("\n");
}
}
// =========================================================
// ARM Generic Timer
// =========================================================
var cntfrq: u64 = 0; // Timer frequency (read at init)
var ns_per_tick_x16: u64 = 0; // (1e9 / freq) * 16 for fixed-point
fn timer_init() void {
// Read timer frequency
cntfrq = asm volatile ("mrs %[ret], cntfrq_el0"
: [ret] "=r" (-> u64),
);
if (cntfrq == 0) {
// Fallback: QEMU virt typically uses 62.5 MHz
cntfrq = 62_500_000;
}
// Precompute ns_per_tick * 16 for fixed-point math
// ns_per_tick = 1_000_000_000 / cntfrq
// We use * 16 to avoid floating point: (1e9 * 16) / cntfrq
ns_per_tick_x16 = (1_000_000_000 * 16) / cntfrq;
// Disable timer initially
asm volatile ("msr cntp_ctl_el0, %[val]"
:
: [val] "r" (@as(u64, 0)),
);
}
fn timer_ack() void {
// Disable timer (mask) to prevent re-firing until rescheduled
asm volatile ("msr cntp_ctl_el0, %[val]"
:
: [val] "r" (@as(u64, 0x2)), // IMASK=1, ENABLE=0
);
}
export fn rumpk_timer_now_ns() u64 {
const cnt: u64 = asm volatile ("mrs %[ret], cntpct_el0"
: [ret] "=r" (-> u64),
);
// Convert to nanoseconds using precomputed fixed-point
// ns = cnt * ns_per_tick = cnt * (ns_per_tick_x16 / 16)
return (cnt * ns_per_tick_x16) >> 4;
}
export fn rumpk_timer_set_ns(interval_ns: u64) void {
if (interval_ns == std.math.maxInt(u64)) {
// Disable timer
asm volatile ("msr cntp_ctl_el0, %[val]"
:
: [val] "r" (@as(u64, 0x2)), // IMASK=1
);
return;
}
// Convert ns to ticks: ticks = ns * cntfrq / 1e9
const ticks = (interval_ns * cntfrq) / 1_000_000_000;
// Set countdown value and enable
asm volatile ("msr cntp_tval_el0, %[val]"
:
: [val] "r" (ticks),
);
asm volatile ("msr cntp_ctl_el0, %[val]"
:
: [val] "r" (@as(u64, 0x1)), // ENABLE=1, IMASK=0
);
}
// =========================================================
// Identity Map (MMU Setup)
// =========================================================
// ARM64 without MMU treats all memory as Device-nGnRnE, which requires
// strict alignment. We set up a minimal identity map with:
// MAIR index 0: Device-nGnRnE (0x00) — for MMIO
// MAIR index 1: Normal Write-Back Cacheable (0xFF) — for RAM
// Using 1GB block descriptors at Level 1 (only need L0 + L1 tables).
// Page table storage (must be 4096-byte aligned)
// 39-bit VA (T0SZ=25) starts walk at L1 — no L0 needed
var l1_table: [512]u64 align(4096) = [_]u64{0} ** 512;
fn setup_identity_map() void {
// MAIR_EL1: index 0 = Device-nGnRnE, index 1 = Normal WB Cacheable
const MAIR_VAL: u64 = 0xFF_00; // attr1=0xFF (Normal WB), attr0=0x00 (Device)
asm volatile ("msr mair_el1, %[val]"
:
: [val] "r" (MAIR_VAL),
);
// TCR_EL1: 4KB granule, 36-bit PA, T0SZ=25 (39-bit VA = 512GB)
// IPS=0b010 (40-bit PA), TG0=0b00 (4KB), SH0=0b11 (Inner Shareable),
// ORGN0=0b01 (WB Cacheable), IRGN0=0b01 (WB Cacheable), T0SZ=25
const TCR_VAL: u64 = (0b010 << 32) | // IPS: 40-bit PA
(0b00 << 14) | // TG0: 4KB granule
(0b11 << 12) | // SH0: Inner Shareable
(0b01 << 10) | // ORGN0: Write-Back Cacheable
(0b01 << 8) | // IRGN0: Write-Back Cacheable
25; // T0SZ: 39-bit VA space
asm volatile ("msr tcr_el1, %[val]"
:
: [val] "r" (TCR_VAL),
);
// With T0SZ=25, VA is 39 bits → translation starts at L1 (no L0 needed).
// L1 entry [38:30] = 9 bits → 512 entries, each 1GB block.
// TTBR0_EL1 points directly at the L1 table.
// Block descriptor: addr[47:30] | AF | SH | AP | AttrIdx | Block(0b01)
const BLOCK_DEVICE: u64 = (1 << 10) | // AF (Access Flag)
(0b00 << 8) | // SH: Non-shareable (Device)
(0b00 << 6) | // AP: EL1 RW
(0b00 << 2) | // AttrIdx: 0 (Device-nGnRnE)
0x1; // Block descriptor
const BLOCK_NORMAL: u64 = (1 << 10) | // AF (Access Flag)
(0b11 << 8) | // SH: Inner Shareable
(0b00 << 6) | // AP: EL1 RW
(0b01 << 2) | // AttrIdx: 1 (Normal Cacheable)
0x1; // Block descriptor
// GB 0 (0x00000000-0x3FFFFFFF): Device (UART, GIC, etc.)
l1_table[0] = (0x00000000) | BLOCK_DEVICE;
// GB 1 (0x40000000-0x7FFFFFFF): Normal RAM (QEMU virt RAM)
l1_table[1] = (0x40000000) | BLOCK_NORMAL;
// GB 2 (0x80000000-0xBFFFFFFF): Device (high MMIO)
l1_table[2] = (0x80000000) | BLOCK_DEVICE;
// GB 3 (0xC0000000-0xFFFFFFFF): Device
l1_table[3] = (0xC0000000) | BLOCK_DEVICE;
// Set TTBR0_EL1 to point at L1 table directly (39-bit VA starts at L1)
const l1_addr = @intFromPtr(&l1_table);
asm volatile ("msr ttbr0_el1, %[val]"
:
: [val] "r" (l1_addr),
);
// Invalidate TLB
asm volatile ("tlbi vmalle1");
asm volatile ("dsb sy");
asm volatile ("isb");
// Enable MMU + caches in SCTLR_EL1
var sctlr: u64 = 0;
asm volatile ("mrs %[out], sctlr_el1"
: [out] "=r" (sctlr),
);
sctlr |= (1 << 0); // M: Enable MMU
sctlr |= (1 << 2); // C: Enable data cache
sctlr |= (1 << 12); // I: Enable instruction cache
sctlr &= ~@as(u64, 1 << 1); // A: Disable alignment check
asm volatile ("msr sctlr_el1, %[val]"
:
: [val] "r" (sctlr),
);
asm volatile ("isb");
}
// =========================================================
// Entry Point
// =========================================================
// SAFETY(Stack): Memory is immediately used by _start before any read.
export var stack_bytes: [64 * 1024]u8 align(16) = undefined;
export fn aarch64_init() void {
// 1. Initialize UART (PL011)
uart.init();
uart.print("[Rumpk L0] aarch64_init reached\n");
// Set up identity-mapped page tables so RAM has Normal memory type.
// Without MMU, ARM64 uses Device memory which requires strict alignment.
setup_identity_map();
uart.print("[Rumpk L0] Identity map + MMU enabled\n");
// 2. Initialize GIC
gic.gic_init();
gic.gic_enable_timer_irq();
uart.print("[Rumpk L0] GICv2 initialized\n");
// 3. Initialize Generic Timer
timer_init();
uart.print("[Rumpk L0] Generic Timer initialized (freq=");
uart.print_hex(cntfrq);
uart.print(")\n");
// 4. Install exception vectors
// We write the vector table with proper branch instructions at runtime
install_vectors_asm();
uart.print("[Rumpk L0] Exception vectors installed\n");
// 5. Enable IRQs (clear DAIF.I bit)
asm volatile ("msr daifclr, #0x2"); // Clear IRQ mask
uart.print("[Rumpk ARM64] Handing off to Nim L1...\n");
// 6. Initialize Nim runtime and enter kernel
NimMain();
kmain();
rumpk_halt();
}
/// Install exception vectors using runtime assembly
/// This writes proper branch instructions into the vector table
fn install_vectors_asm() void {
// Set VBAR_EL1 to point at our vector handler functions
// We use a simpler approach: write a small vector table in a static buffer
// with branch instructions to our Zig handler functions.
// The vector table entries need to branch to our handlers.
// ARM64 exception vectors: each entry is 128 bytes (0x80).
// We write `b <handler>` at the start of each entry.
// For the entries we care about:
// 0x200: Current EL SPx Sync -> vector_sync_handler
// 0x280: Current EL SPx IRQ -> vector_irq_handler
// 0x400: Lower EL Sync -> vector_sync_lower
// 0x480: Lower EL IRQ -> vector_irq_lower
// We need the vector table to be 2048-byte aligned.
// Use our static vector_table_runtime buffer.
const table_addr = @intFromPtr(&vector_table_runtime);
// Fill with WFE (halt) as default
const wfe_insn: u32 = 0xD503205F; // WFE
var i: usize = 0;
while (i < 2048) : (i += 4) {
const ptr: *volatile u32 = @ptrFromInt(table_addr + i);
ptr.* = wfe_insn;
}
// Write branch instructions to our handlers
write_branch_to(table_addr + 0x200, @intFromPtr(&vector_sync_handler));
write_branch_to(table_addr + 0x280, @intFromPtr(&vector_irq_handler));
write_branch_to(table_addr + 0x400, @intFromPtr(&vector_sync_lower));
write_branch_to(table_addr + 0x480, @intFromPtr(&vector_irq_lower));
// Set VBAR_EL1
asm volatile ("msr vbar_el1, %[vbar]"
:
: [vbar] "r" (table_addr),
);
asm volatile ("isb");
}
/// Runtime-writable vector table (2048 bytes, 2048-byte aligned)
var vector_table_runtime: [2048]u8 align(2048) = [_]u8{0} ** 2048;
/// Write a branch instruction at `from` that jumps to `target`
fn write_branch_to(from: usize, target: usize) void {
// ARM64 B instruction: 0x14000000 | (imm26)
// imm26 is a signed offset in 4-byte units
const offset_bytes: i64 = @as(i64, @intCast(target)) - @as(i64, @intCast(from));
const offset_words: i32 = @intCast(@divExact(offset_bytes, 4));
const imm26: u32 = @as(u32, @bitCast(offset_words)) & 0x03FFFFFF;
const insn: u32 = 0x14000000 | imm26;
const ptr: *volatile u32 = @ptrFromInt(from);
ptr.* = insn;
}
// =========================================================
// HAL Exports (Contract with L1 Nim Kernel)
// =========================================================
export fn hal_console_write(ptr: [*]const u8, len: usize) void {
uart.write_bytes(ptr[0..len]);
}
export fn console_read() c_int {
if (uart_input.read_byte()) |b| {
return @as(c_int, b);
}
return -1;
}
export fn console_poll() void {
uart_input.poll_input();
}
export fn debug_uart_lsr() u8 {
return uart.get_lsr();
}
export fn uart_print_hex(value: u64) void {
uart.print_hex(value);
}
export fn uart_print_hex8(value: u8) void {
uart.print_hex8(value);
}
export fn hal_io_init() void {
uart.init();
hal_surface_init();
// Initialize VirtIO block storage (MMIO transport)
const virtio_block = @import("virtio_block.zig");
virtio_block.init();
}
export fn hal_panic(msg: [*:0]const u8) callconv(.c) noreturn {
uart.print("[HAL PANIC] ");
uart.print(std.mem.span(msg));
uart.print("\n");
rumpk_halt();
}
export fn rumpk_halt() noreturn {
uart.print("[Rumpk ARM64] Halting.\n");
while (true) {
asm volatile ("wfe");
}
}
export fn hal_kexec(entry: u64, dtb: u64) noreturn {
_ = entry;
_ = dtb;
uart.print("[HAL] kexec not implemented on ARM64\n");
rumpk_halt();
}
// =========================================================
// Page Table Infrastructure (M3.3 — 4KB Granule, 39-bit VA)
// =========================================================
// The identity map above uses 1GB block descriptors for early boot.
// For user isolation we need 4KB page granularity (L1→L2→L3 walk).
const PAGE_SIZE: u64 = 4096;
const PAGE_SHIFT: u6 = 12;
const ENTRIES_PER_TABLE: usize = 512;
// ARM64 descriptor bits
const DESC_VALID: u64 = 1 << 0;
const DESC_TABLE: u64 = 0b11; // L1/L2 table pointer
const DESC_PAGE: u64 = 0b11; // L3 page descriptor
const DESC_AF: u64 = 1 << 10; // Access Flag
const DESC_SH_ISH: u64 = 0b11 << 8; // Inner Shareable
const DESC_AP_RW_EL1: u64 = 0b00 << 6; // EL1 RW, EL0 no access
const DESC_AP_RW_ALL: u64 = 0b01 << 6; // EL1+EL0 RW
const DESC_UXN: u64 = @as(u64, 1) << 54; // Unprivileged Execute Never
const DESC_PXN: u64 = @as(u64, 1) << 53; // Privileged Execute Never
const ATTR_DEVICE: u64 = 0b00 << 2; // AttrIdx=0 (Device-nGnRnE)
const ATTR_NORMAL: u64 = 0b01 << 2; // AttrIdx=1 (Normal WB Cacheable)
const DRAM_BASE: u64 = 0x40000000;
// Bump allocator for page tables (8MB pool)
var pt_alloc_base: u64 = 0;
var pt_alloc_offset: u64 = 0;
const PT_POOL_SIZE: u64 = 8 * 1024 * 1024;
fn pt_init_allocator(base: u64) void {
pt_alloc_base = base;
pt_alloc_offset = 0;
}
/// Allocate one zeroed 4KB-aligned page table
fn pt_alloc() ?[*]u64 {
if (pt_alloc_offset + PAGE_SIZE > PT_POOL_SIZE) {
uart.print("[MM] Page table pool exhausted!\n");
return null;
}
const addr = pt_alloc_base + pt_alloc_offset;
pt_alloc_offset += PAGE_SIZE;
// Zero all 512 entries
const table: [*]volatile u64 = @ptrFromInt(addr);
for (0..ENTRIES_PER_TABLE) |i| {
table[i] = 0;
}
return @ptrFromInt(addr);
}
/// Map a single 4KB page: walk L1→L2→L3, allocating intermediate tables
fn map_page(root: [*]u64, va: u64, pa: u64, attrs: u64) void {
// 39-bit VA with 4KB granule:
// L1 index = VA[38:30] (9 bits)
// L2 index = VA[29:21] (9 bits)
// L3 index = VA[20:12] (9 bits)
const l1_idx = (va >> 30) & 0x1FF;
const l2_idx = (va >> 21) & 0x1FF;
const l3_idx = (va >> 12) & 0x1FF;
// L1 → L2 table
const l1_entry = root[l1_idx];
const l2_table: [*]u64 = if (l1_entry & DESC_VALID != 0)
@ptrFromInt(l1_entry & 0x0000FFFFFFFFF000)
else blk: {
const new_l2 = pt_alloc() orelse return;
root[l1_idx] = @intFromPtr(new_l2) | DESC_TABLE;
break :blk new_l2;
};
// L2 → L3 table
const l2_entry = l2_table[l2_idx];
const l3_table: [*]u64 = if (l2_entry & DESC_VALID != 0)
@ptrFromInt(l2_entry & 0x0000FFFFFFFFF000)
else blk: {
const new_l3 = pt_alloc() orelse return;
l2_table[l2_idx] = @intFromPtr(new_l3) | DESC_TABLE;
break :blk new_l3;
};
// L3 page descriptor: pa[47:12] | attrs | DESC_PAGE (0b11)
l3_table[l3_idx] = (pa & 0x0000FFFFFFFFF000) | attrs | DESC_PAGE;
}
/// Map a range of pages (va and pa must be page-aligned)
fn map_range(root: [*]u64, va_start: u64, pa_start: u64, size: u64, attrs: u64) void {
var offset: u64 = 0;
while (offset < size) : (offset += PAGE_SIZE) {
map_page(root, va_start + offset, pa_start + offset, attrs);
}
}
// =========================================================
// HAL Userland Entry (EL1 → EL0 via eret)
// =========================================================
export fn hal_enter_userland(entry: u64, systable: u64, sp: u64) callconv(.c) void {
// SPSR_EL1 = 0 → return to EL0t (M[3:0]=0b0000), DAIF clear (IRQs enabled)
const spsr: u64 = 0;
asm volatile (
\\ msr spsr_el1, %[spsr]
\\ msr elr_el1, %[entry]
\\ msr sp_el0, %[sp]
\\ mov x0, %[systable]
\\ eret
:
: [spsr] "r" (spsr),
[entry] "r" (entry),
[sp] "r" (sp),
[systable] "r" (systable),
);
}
// =========================================================
// Memory Management (M3.3 — Full Page Tables)
// =========================================================
extern fn kprint(s: [*:0]const u8) void;
extern fn kprint_hex(n: u64) void;
var kernel_ttbr0: u64 = 0;
export fn mm_init() callconv(.c) void {
// Page table pool at DRAM_BASE + 240MB (same offset as RISC-V)
pt_init_allocator(DRAM_BASE + 240 * 1024 * 1024);
}
export fn mm_enable_kernel_paging() callconv(.c) void {
// Identity map is already set up by setup_identity_map() using 1GB blocks.
// Store current TTBR0 for later restore after worker map switches.
asm volatile ("mrs %[out], ttbr0_el1"
: [out] "=r" (kernel_ttbr0),
);
}
export fn mm_get_kernel_satp() callconv(.c) u64 {
return kernel_ttbr0;
}
export fn mm_create_worker_map(
stack_base: u64,
stack_size: u64,
packet_addr: u64,
phys_base: u64,
region_size: u64,
) callconv(.c) u64 {
const root = pt_alloc() orelse return 0;
kprint("[MM] Cellular Map: phys_base=");
kprint_hex(phys_base);
kprint(" size=");
kprint_hex(region_size);
kprint("\n");
// Kernel attributes: EL1 RW, Normal cacheable, no EL0 access
const kern_attrs = DESC_AF | DESC_SH_ISH | DESC_AP_RW_EL1 | ATTR_NORMAL;
// User attributes: EL1+EL0 RW, Normal cacheable
const user_attrs = DESC_AF | DESC_SH_ISH | DESC_AP_RW_ALL | ATTR_NORMAL;
// Device attributes: EL1 only, Device memory
const dev_attrs = DESC_AF | DESC_AP_RW_EL1 | ATTR_DEVICE;
// Shared attributes: EL1+EL0 RW, Normal cacheable (for SysTable/ION rings)
const shared_attrs = DESC_AF | DESC_SH_ISH | DESC_AP_RW_ALL | ATTR_NORMAL | DESC_UXN;
// 1. Kernel memory (0x400000000x48000000 = 128MB): EL1 only
// Allows kernel trap handlers to execute while worker map is active
map_range(root, DRAM_BASE, DRAM_BASE, 128 * 1024 * 1024, kern_attrs);
// 2. User cell (identity mapped): EL0 accessible
// Init: VA 0x48000000 → PA 0x48000000 (64MB)
// Child: VA 0x48000000 → PA phys_base (varies)
const user_va_base = DRAM_BASE + 128 * 1024 * 1024; // 0x48000000
map_range(root, user_va_base, phys_base, region_size, user_attrs);
// 3. MMIO devices: EL1 only (kernel handles I/O)
map_range(root, 0x09000000, 0x09000000, PAGE_SIZE, dev_attrs); // PL011 UART
map_range(root, 0x08000000, 0x08000000, 0x20000, dev_attrs); // GICv2
map_range(root, 0x0a000000, 0x0a000000, 0x200 * 32, dev_attrs); // VirtIO MMIO
// 4. SysTable + ION rings: EL0 RW (256KB = 64 pages)
map_range(root, packet_addr, packet_addr, 64 * PAGE_SIZE, shared_attrs);
// 5. Optional kernel stack mapping (if stack_base != 0)
if (stack_base != 0) {
map_range(root, stack_base, stack_base, stack_size, user_attrs);
}
kprint("[MM] Worker map created successfully\n");
// Return TTBR0 value (physical address of root table)
// ARM64 TTBR has no mode bits like RISC-V SATP — just the address
return @intFromPtr(root);
}
export fn mm_activate_satp(satp_val: u64) callconv(.c) void {
asm volatile ("msr ttbr0_el1, %[val]"
:
: [val] "r" (satp_val),
);
asm volatile ("isb");
asm volatile ("tlbi vmalle1");
asm volatile ("dsb sy");
asm volatile ("isb");
}
export fn mm_debug_check_va(va: u64) callconv(.c) void {
kprint("[MM] Inspecting VA: ");
kprint_hex(va);
kprint("\n");
// Read current TTBR0
var ttbr0: u64 = 0;
asm volatile ("mrs %[out], ttbr0_el1"
: [out] "=r" (ttbr0),
);
const root: [*]const u64 = @ptrFromInt(ttbr0 & 0x0000FFFFFFFFF000);
const l1_idx = (va >> 30) & 0x1FF;
const l1_entry = root[l1_idx];
kprint(" L1[");
kprint_hex(l1_idx);
kprint("]: ");
kprint_hex(l1_entry);
if (l1_entry & DESC_VALID == 0) {
kprint(" (Invalid)\n");
return;
}
if (l1_entry & 0b10 == 0) {
kprint(" (Block)\n");
return;
}
kprint(" (Table)\n");
const l2: [*]const u64 = @ptrFromInt(l1_entry & 0x0000FFFFFFFFF000);
const l2_idx = (va >> 21) & 0x1FF;
const l2_entry = l2[l2_idx];
kprint(" L2[");
kprint_hex(l2_idx);
kprint("]: ");
kprint_hex(l2_entry);
if (l2_entry & DESC_VALID == 0) {
kprint(" (Invalid)\n");
return;
}
if (l2_entry & 0b10 == 0) {
kprint(" (Block)\n");
return;
}
kprint(" (Table)\n");
const l3: [*]const u64 = @ptrFromInt(l2_entry & 0x0000FFFFFFFFF000);
const l3_idx = (va >> 12) & 0x1FF;
const l3_entry = l3[l3_idx];
kprint(" L3[");
kprint_hex(l3_idx);
kprint("]: ");
kprint_hex(l3_entry);
kprint("\n");
}
// VirtIO drivers now provided by virtio_net.zig and virtio_block.zig via abi.zig imports