nip/src/nimpak/benchmark.nim

301 lines
10 KiB
Nim

# SPDX-License-Identifier: LSL-1.0
# Copyright (c) 2026 Markus Maiwald
# Stewardship: Self Sovereign Society Foundation
#
# This file is part of the Nexus Sovereign Core.
# See legal/LICENSE_SOVEREIGN.md for license terms.
## NimPak Performance Benchmarking
##
## Comprehensive benchmarks for the NimPak package manager.
## Task 43: Performance benchmarking.
import std/[os, strutils, strformat, times, random, json, stats, sequtils]
import cas
type
BenchmarkResult* = object
name*: string
iterations*: int
totalTime*: float # Total time in seconds
avgTime*: float # Average time per operation in ms
minTime*: float # Minimum time in ms
maxTime*: float # Maximum time in ms
stdDev*: float # Standard deviation in ms
opsPerSec*: float # Operations per second
bytesProcessed*: int64 # Total bytes processed
throughputMBps*: float # Throughput in MB/s
BenchmarkSuite* = object
name*: string
results*: seq[BenchmarkResult]
startTime*: DateTime
endTime*: DateTime
# ############################################################################
# Benchmark Utilities
# ############################################################################
proc calculateStats*(times: seq[float], iterations: int): BenchmarkResult =
## Calculate statistics from timing data
result.iterations = iterations
result.totalTime = times.foldl(a + b, 0.0) / 1000.0 # Total in seconds
result.avgTime = mean(times)
result.minTime = min(times)
result.maxTime = max(times)
if times.len > 1:
result.stdDev = standardDeviation(times)
else:
result.stdDev = 0.0
if result.totalTime > 0:
result.opsPerSec = float(iterations) / result.totalTime
else:
result.opsPerSec = 0.0
proc formatBenchmarkResult*(r: BenchmarkResult): string =
## Format a benchmark result for display
result = fmt"""
{r.name}:
Iterations: {r.iterations}
Total time: {r.totalTime:.3f}s
Avg time: {r.avgTime:.3f}ms
Min time: {r.minTime:.3f}ms
Max time: {r.maxTime:.3f}ms
Std dev: {r.stdDev:.3f}ms
Ops/sec: {r.opsPerSec:.0f}"""
if r.bytesProcessed > 0:
result.add fmt"""
Throughput: {r.throughputMBps:.2f} MB/s"""
# ############################################################################
# CAS Benchmarks
# ############################################################################
proc benchmarkCasStore*(casManager: var CasManager, dataSize: int, iterations: int): BenchmarkResult =
## Benchmark CAS store operation
var times: seq[float] = @[]
for i in 1..iterations:
var testData = newSeq[byte](dataSize)
randomize()
for j in 0..<dataSize:
testData[j] = byte(rand(255))
let startTime = epochTime()
discard casManager.storeObject(testData)
let endTime = epochTime()
times.add((endTime - startTime) * 1000.0)
result = calculateStats(times, iterations)
result.name = fmt"CAS Store ({dataSize} bytes)"
result.bytesProcessed = int64(dataSize) * int64(iterations)
if result.totalTime > 0:
result.throughputMBps = float(result.bytesProcessed) / (result.totalTime * 1024 * 1024)
proc benchmarkCasRetrieve*(casManager: var CasManager, dataSize: int, iterations: int): BenchmarkResult =
## Benchmark CAS retrieve operation
var testData = newSeq[byte](dataSize)
for i in 0..<dataSize:
testData[i] = byte(i mod 256)
let storeResult = casManager.storeObject(testData)
let storedHash = storeResult.get().hash
var times: seq[float] = @[]
for i in 1..iterations:
let startTime = epochTime()
discard casManager.retrieveObject(storedHash)
let endTime = epochTime()
times.add((endTime - startTime) * 1000.0)
result = calculateStats(times, iterations)
result.name = fmt"CAS Retrieve ({dataSize} bytes)"
result.bytesProcessed = int64(dataSize) * int64(iterations)
if result.totalTime > 0:
result.throughputMBps = float(result.bytesProcessed) / (result.totalTime * 1024 * 1024)
proc benchmarkCasExists*(casManager: var CasManager, iterations: int): BenchmarkResult =
## Benchmark CAS existence check
let testData = @[byte(1), byte(2), byte(3)]
let storeResult = casManager.storeObject(testData)
let existingHash = storeResult.get().hash
let nonExistingHash = "xxh3-nonexistent0000000000000000"
var times: seq[float] = @[]
var checkExisting = true
for i in 1..iterations:
let startTime = epochTime()
if checkExisting:
discard casManager.objectExists(existingHash)
else:
discard casManager.objectExists(nonExistingHash)
let endTime = epochTime()
times.add((endTime - startTime) * 1000.0)
checkExisting = not checkExisting
result = calculateStats(times, iterations)
result.name = "CAS Exists Check"
proc benchmarkCasHash*(dataSize: int, iterations: int): BenchmarkResult =
## Benchmark hash calculation (without storage)
var testData = newSeq[byte](dataSize)
for i in 0..<dataSize:
testData[i] = byte(i mod 256)
var times: seq[float] = @[]
for i in 1..iterations:
let startTime = epochTime()
discard calculateXxh3(testData)
let endTime = epochTime()
times.add((endTime - startTime) * 1000.0)
result = calculateStats(times, iterations)
result.name = fmt"Hash Calculation ({dataSize} bytes)"
result.bytesProcessed = int64(dataSize) * int64(iterations)
if result.totalTime > 0:
result.throughputMBps = float(result.bytesProcessed) / (result.totalTime * 1024 * 1024)
# ############################################################################
# Deduplication Benchmarks
# ############################################################################
proc benchmarkDeduplication*(casManager: var CasManager, chunkSize: int,
duplicateRatio: float, iterations: int): BenchmarkResult =
## Benchmark deduplication with varying duplicate ratios
var chunks: seq[seq[byte]] = @[]
var uniqueChunks = max(1, int(float(iterations) * (1.0 - duplicateRatio)))
# Generate unique chunks
for i in 0..<uniqueChunks:
var chunk = newSeq[byte](chunkSize)
for j in 0..<chunkSize:
chunk[j] = byte((i + j) mod 256)
chunks.add(chunk)
# Add duplicate chunks
randomize()
while chunks.len < iterations:
chunks.add(chunks[rand(uniqueChunks - 1)])
var times: seq[float] = @[]
for i in 0..<iterations:
let startTime = epochTime()
discard casManager.storeObject(chunks[i])
let endTime = epochTime()
times.add((endTime - startTime) * 1000.0)
result = calculateStats(times, iterations)
result.name = fmt"Deduplication ({duplicateRatio*100:.0f}% duplicates)"
result.bytesProcessed = int64(chunkSize) * int64(iterations)
# ############################################################################
# Benchmark Suite Runner
# ############################################################################
proc runCasBenchmarks*(casRoot: string, quick: bool = false): BenchmarkSuite =
## Run the full CAS benchmark suite
result = BenchmarkSuite(
name: "CAS Performance Benchmarks",
startTime: now()
)
var casManager = initCasManager(casRoot, casRoot / "system")
# Adjust iterations based on quick mode
let baseIters = if quick: 10 else: 100
let hashIters = if quick: 100 else: 1000
echo "Running CAS Performance Benchmarks..."
echo "=" .repeat(60)
# Hash benchmarks at various sizes
echo "\n📊 Hash Calculation Benchmarks:"
for size in [1024, 4096, 65536, 1048576]:
let r = benchmarkCasHash(size, hashIters)
result.results.add(r)
echo formatBenchmarkResult(r)
# Store benchmarks at various sizes
echo "\n📦 Store Benchmarks:"
for size in [1024, 4096, 65536]:
let r = benchmarkCasStore(casManager, size, baseIters)
result.results.add(r)
echo formatBenchmarkResult(r)
# Retrieve benchmarks
echo "\n📤 Retrieve Benchmarks:"
for size in [1024, 4096, 65536]:
let r = benchmarkCasRetrieve(casManager, size, baseIters)
result.results.add(r)
echo formatBenchmarkResult(r)
# Existence check benchmark
echo "\n🔍 Existence Check Benchmark:"
let existsResult = benchmarkCasExists(casManager, baseIters * 10)
result.results.add(existsResult)
echo formatBenchmarkResult(existsResult)
# Deduplication benchmarks
echo "\n🔄 Deduplication Benchmarks:"
for ratio in [0.0, 0.5, 0.9]:
let r = benchmarkDeduplication(casManager, 4096, ratio, baseIters)
result.results.add(r)
echo formatBenchmarkResult(r)
result.endTime = now()
echo "\n" & "=" .repeat(60)
echo fmt"Total benchmark time: {(result.endTime - result.startTime).inSeconds}s"
proc generateBenchmarkReport*(suite: BenchmarkSuite): string =
## Generate a JSON benchmark report
var report = %*{
"suite": suite.name,
"startTime": suite.startTime.format("yyyy-MM-dd'T'HH:mm:ss'Z'"),
"endTime": suite.endTime.format("yyyy-MM-dd'T'HH:mm:ss'Z'"),
"results": []
}
for r in suite.results:
report["results"].add(%*{
"name": r.name,
"iterations": r.iterations,
"totalTime": r.totalTime,
"avgTimeMs": r.avgTime,
"minTimeMs": r.minTime,
"maxTimeMs": r.maxTime,
"stdDevMs": r.stdDev,
"opsPerSec": r.opsPerSec,
"bytesProcessed": r.bytesProcessed,
"throughputMBps": r.throughputMBps
})
result = report.pretty()
# ############################################################################
# Comparison with Other Systems (Placeholder)
# ############################################################################
proc compareBenchmarks*(nipResults: BenchmarkResult,
flatpakTime: float, snapTime: float, dockerTime: float): string =
## Generate comparison with other packaging systems
result = fmt"""
Comparison for: {nipResults.name}
| System | Time (ms) | Speedup vs NIP |
|-----------|-----------|----------------|
| NIP | {nipResults.avgTime:>9.2f} | 1.00x |
| Flatpak | {flatpakTime:>9.2f} | {nipResults.avgTime/flatpakTime:.2f}x |
| Snap | {snapTime:>9.2f} | {nipResults.avgTime/snapTime:.2f}x |
| Docker | {dockerTime:>9.2f} | {nipResults.avgTime/dockerTime:.2f}x |
"""