nip/src/nimpak/benchmark.nim

# SPDX-License-Identifier: LSL-1.0
# Copyright (c) 2026 Markus Maiwald
# Stewardship: Self Sovereign Society Foundation
#
# This file is part of the Nexus Sovereign Core.
# See legal/LICENSE_SOVEREIGN.md for license terms.

## NimPak Performance Benchmarking
##
## Comprehensive benchmarks for the NimPak package manager.
## Task 43: Performance benchmarking.

import std/[os, strutils, strformat, times, random, json, stats, sequtils]
import cas

type
  BenchmarkResult* = object
    name*: string
    iterations*: int
    totalTime*: float      # Total time in seconds
    avgTime*: float        # Average time per operation in ms
    minTime*: float        # Minimum time in ms
    maxTime*: float        # Maximum time in ms
    stdDev*: float         # Standard deviation in ms
    opsPerSec*: float      # Operations per second
    bytesProcessed*: int64 # Total bytes processed
    throughputMBps*: float # Throughput in MB/s

  BenchmarkSuite* = object
    name*: string
    results*: seq[BenchmarkResult]
    startTime*: DateTime
    endTime*: DateTime

# ############################################################################
# Benchmark Utilities
# ############################################################################

proc calculateStats*(times: seq[float], iterations: int): BenchmarkResult =
  ## Calculate statistics from timing data
  result.iterations = iterations
  result.totalTime = times.foldl(a + b, 0.0) / 1000.0  # Total in seconds
  result.avgTime = mean(times)
  result.minTime = min(times)
  result.maxTime = max(times)
  if times.len > 1:
    result.stdDev = standardDeviation(times)
  else:
    result.stdDev = 0.0
  if result.totalTime > 0:
    result.opsPerSec = float(iterations) / result.totalTime
  else:
    result.opsPerSec = 0.0

proc formatBenchmarkResult*(r: BenchmarkResult): string =
  ## Format a benchmark result for display
  result = fmt"""
{r.name}:
  Iterations: {r.iterations}
  Total time: {r.totalTime:.3f}s
  Avg time:   {r.avgTime:.3f}ms
  Min time:   {r.minTime:.3f}ms
  Max time:   {r.maxTime:.3f}ms
  Std dev:    {r.stdDev:.3f}ms
  Ops/sec:    {r.opsPerSec:.0f}"""

  if r.bytesProcessed > 0:
    result.add fmt"""
  Throughput: {r.throughputMBps:.2f} MB/s"""

# ############################################################################
# CAS Benchmarks
# ############################################################################

proc benchmarkCasStore*(casManager: var CasManager, dataSize: int, iterations: int): BenchmarkResult =
  ## Benchmark CAS store operation
  var times: seq[float] = @[]

  for i in 1..iterations:
    var testData = newSeq[byte](dataSize)
    randomize()
    for j in 0..<dataSize:
      testData[j] = byte(rand(255))

    let startTime = epochTime()
    discard casManager.storeObject(testData)
    let endTime = epochTime()

    times.add((endTime - startTime) * 1000.0)

  result = calculateStats(times, iterations)
  result.name = fmt"CAS Store ({dataSize} bytes)"
  result.bytesProcessed = int64(dataSize) * int64(iterations)
  if result.totalTime > 0:
    result.throughputMBps = float(result.bytesProcessed) / (result.totalTime * 1024 * 1024)

proc benchmarkCasRetrieve*(casManager: var CasManager, dataSize: int, iterations: int): BenchmarkResult =
  ## Benchmark CAS retrieve operation
  var testData = newSeq[byte](dataSize)
  for i in 0..<dataSize:
    testData[i] = byte(i mod 256)
  let storeResult = casManager.storeObject(testData)
  let storedHash = storeResult.get().hash

  var times: seq[float] = @[]

  for i in 1..iterations:
    let startTime = epochTime()
    discard casManager.retrieveObject(storedHash)
    let endTime = epochTime()

    times.add((endTime - startTime) * 1000.0)

  result = calculateStats(times, iterations)
  result.name = fmt"CAS Retrieve ({dataSize} bytes)"
  result.bytesProcessed = int64(dataSize) * int64(iterations)
  if result.totalTime > 0:
    result.throughputMBps = float(result.bytesProcessed) / (result.totalTime * 1024 * 1024)

proc benchmarkCasExists*(casManager: var CasManager, iterations: int): BenchmarkResult =
  ## Benchmark CAS existence check
  let testData = @[byte(1), byte(2), byte(3)]
  let storeResult = casManager.storeObject(testData)
  let existingHash = storeResult.get().hash
  let nonExistingHash = "xxh3-nonexistent0000000000000000"

  var times: seq[float] = @[]
  var checkExisting = true

  for i in 1..iterations:
    let startTime = epochTime()
    if checkExisting:
      discard casManager.objectExists(existingHash)
    else:
      discard casManager.objectExists(nonExistingHash)
    let endTime = epochTime()

    times.add((endTime - startTime) * 1000.0)
    checkExisting = not checkExisting

  result = calculateStats(times, iterations)
  result.name = "CAS Exists Check"

proc benchmarkCasHash*(dataSize: int, iterations: int): BenchmarkResult =
  ## Benchmark hash calculation (without storage)
  var testData = newSeq[byte](dataSize)
  for i in 0..<dataSize:
    testData[i] = byte(i mod 256)

  var times: seq[float] = @[]

  for i in 1..iterations:
    let startTime = epochTime()
    discard calculateXxh3(testData)
    let endTime = epochTime()

    times.add((endTime - startTime) * 1000.0)

  result = calculateStats(times, iterations)
  result.name = fmt"Hash Calculation ({dataSize} bytes)"
  result.bytesProcessed = int64(dataSize) * int64(iterations)
  if result.totalTime > 0:
    result.throughputMBps = float(result.bytesProcessed) / (result.totalTime * 1024 * 1024)

# ############################################################################
# Deduplication Benchmarks
# ############################################################################

proc benchmarkDeduplication*(casManager: var CasManager, chunkSize: int,
                             duplicateRatio: float, iterations: int): BenchmarkResult =
  ## Benchmark deduplication with varying duplicate ratios
  var chunks: seq[seq[byte]] = @[]
  var uniqueChunks = max(1, int(float(iterations) * (1.0 - duplicateRatio)))

  # Generate unique chunks
  for i in 0..<uniqueChunks:
    var chunk = newSeq[byte](chunkSize)
    for j in 0..<chunkSize:
      chunk[j] = byte((i + j) mod 256)
    chunks.add(chunk)

  # Add duplicate chunks
  randomize()
  while chunks.len < iterations:
    chunks.add(chunks[rand(uniqueChunks - 1)])

  var times: seq[float] = @[]

  for i in 0..<iterations:
    let startTime = epochTime()
    discard casManager.storeObject(chunks[i])
    let endTime = epochTime()

    times.add((endTime - startTime) * 1000.0)

  result = calculateStats(times, iterations)
  result.name = fmt"Deduplication ({duplicateRatio*100:.0f}% duplicates)"
  result.bytesProcessed = int64(chunkSize) * int64(iterations)

# ############################################################################
# Benchmark Suite Runner
# ############################################################################

proc runCasBenchmarks*(casRoot: string, quick: bool = false): BenchmarkSuite =
  ## Run the full CAS benchmark suite
  result = BenchmarkSuite(
    name: "CAS Performance Benchmarks",
    startTime: now()
  )

  var casManager = initCasManager(casRoot, casRoot / "system")

  # Adjust iterations based on quick mode
  let baseIters = if quick: 10 else: 100
  let hashIters = if quick: 100 else: 1000

  echo "Running CAS Performance Benchmarks..."
  echo "=" .repeat(60)

  # Hash benchmarks at various sizes
  echo "\n📊 Hash Calculation Benchmarks:"
  for size in [1024, 4096, 65536, 1048576]:
    let r = benchmarkCasHash(size, hashIters)
    result.results.add(r)
    echo formatBenchmarkResult(r)

  # Store benchmarks at various sizes
  echo "\n📦 Store Benchmarks:"
  for size in [1024, 4096, 65536]:
    let r = benchmarkCasStore(casManager, size, baseIters)
    result.results.add(r)
    echo formatBenchmarkResult(r)

  # Retrieve benchmarks
  echo "\n📤 Retrieve Benchmarks:"
  for size in [1024, 4096, 65536]:
    let r = benchmarkCasRetrieve(casManager, size, baseIters)
    result.results.add(r)
    echo formatBenchmarkResult(r)

  # Existence check benchmark
  echo "\n🔍 Existence Check Benchmark:"
  let existsResult = benchmarkCasExists(casManager, baseIters * 10)
  result.results.add(existsResult)
  echo formatBenchmarkResult(existsResult)

  # Deduplication benchmarks
  echo "\n🔄 Deduplication Benchmarks:"
  for ratio in [0.0, 0.5, 0.9]:
    let r = benchmarkDeduplication(casManager, 4096, ratio, baseIters)
    result.results.add(r)
    echo formatBenchmarkResult(r)

  result.endTime = now()

  echo "\n" & "=" .repeat(60)
  echo fmt"Total benchmark time: {(result.endTime - result.startTime).inSeconds}s"

proc generateBenchmarkReport*(suite: BenchmarkSuite): string =
  ## Generate a JSON benchmark report
  var report = %*{
    "suite": suite.name,
    "startTime": suite.startTime.format("yyyy-MM-dd'T'HH:mm:ss'Z'"),
    "endTime": suite.endTime.format("yyyy-MM-dd'T'HH:mm:ss'Z'"),
    "results": []
  }

  for r in suite.results:
    report["results"].add(%*{
      "name": r.name,
      "iterations": r.iterations,
      "totalTime": r.totalTime,
      "avgTimeMs": r.avgTime,
      "minTimeMs": r.minTime,
      "maxTimeMs": r.maxTime,
      "stdDevMs": r.stdDev,
      "opsPerSec": r.opsPerSec,
      "bytesProcessed": r.bytesProcessed,
      "throughputMBps": r.throughputMBps
    })

  result = report.pretty()

# ############################################################################
# Comparison with Other Systems (Placeholder)
# ############################################################################

proc compareBenchmarks*(nipResults: BenchmarkResult,
                        flatpakTime: float, snapTime: float, dockerTime: float): string =
  ## Generate comparison with other packaging systems
  result = fmt"""
Comparison for: {nipResults.name}

| System    | Time (ms) | Speedup vs NIP |
|-----------|-----------|----------------|
| NIP       | {nipResults.avgTime:>9.2f} | 1.00x          |
| Flatpak   | {flatpakTime:>9.2f} | {nipResults.avgTime/flatpakTime:.2f}x          |
| Snap      | {snapTime:>9.2f} | {nipResults.avgTime/snapTime:.2f}x          |
| Docker    | {dockerTime:>9.2f} | {nipResults.avgTime/dockerTime:.2f}x          |
"""