nip/src/nimpak/chunks.nim

624 lines
20 KiB
Nim

# SPDX-License-Identifier: LSL-1.0
# Copyright (c) 2026 Markus Maiwald
# Stewardship: Self Sovereign Society Foundation
#
# This file is part of the Nexus Sovereign Core.
# See legal/LICENSE_SOVEREIGN.md for license terms.
## NCA Content-Addressable Chunks Format Handler
##
## This module implements the NCA (Nexus Content-Addressable) chunk format for
## Merkle-tree based content-addressable storage. NCA chunks provide efficient
## deduplication, integrity verification, and optional compression.
##
## Format: .nca (Nexus Content-Addressable chunk)
## - BLAKE3 Merkle tree hash support for chunk verification
## - Optional zstd compression with hash verification on uncompressed content
## - Chunk-level deduplication and retrieval system
## - Merkle proof generation and verification for integrity
import std/[os, json, times, strutils, sequtils, tables, options, algorithm]
import ./types_fixed
import ./formats
import ./cas
type
NcaError* = object of NimPakError
chunkHash*: string
ChunkValidationResult* = object
valid*: bool
errors*: seq[ValidationError]
warnings*: seq[string]
MerkleTree* = object
## Merkle tree for chunk verification
leaves*: seq[string] ## Leaf hashes
nodes*: seq[seq[string]] ## Internal node hashes by level
root*: string ## Root hash
const
NCA_MAGIC* = "NCA\x01" ## Magic bytes for NCA format
MAX_CHUNK_SIZE* = 1024 * 1024 ## 1MB maximum chunk size
MIN_CHUNK_SIZE* = 1024 ## 1KB minimum chunk size
# =============================================================================
# Merkle Tree Implementation
# =============================================================================
proc calculateMerkleHash(left: string, right: string): string =
## Calculate hash of two child nodes
let combined = left & right
calculateBlake3(combined.toOpenArrayByte(0, combined.len - 1).toSeq())
proc buildMerkleTree*(hashes: seq[string]): MerkleTree =
## Build Merkle tree from leaf hashes
if hashes.len == 0:
return MerkleTree(leaves: @[], nodes: @[], root: "")
var tree = MerkleTree(leaves: hashes, nodes: @[])
var currentLevel = hashes
# Build tree bottom-up
while currentLevel.len > 1:
var nextLevel: seq[string] = @[]
# Process pairs of nodes
for i in countup(0, currentLevel.len - 1, 2):
if i + 1 < currentLevel.len:
# Pair exists
let parentHash = calculateMerkleHash(currentLevel[i], currentLevel[i + 1])
nextLevel.add(parentHash)
else:
# Odd node, promote to next level
nextLevel.add(currentLevel[i])
tree.nodes.add(currentLevel)
currentLevel = nextLevel
# Root iotthe last remaining node
if currentLevel.len > 0:
tree.root = currentLevel[0]
tree.nodes.add(currentLevel)
return tree
proc generateMerkleProof*(tree: MerkleTree, leafIndex: int): MerkleProof =
## Generate Merkle proof for a specific leaf
if leafIndex >= tree.leaves.len:
return MerkleProof(path: @[], indices: @[])
var proof = MerkleProof(path: @[], indices: @[])
var currentIndex = leafIndex
# Traverse from leaf to root
for level in 0..<tree.nodes.len - 1:
let levelNodes = tree.nodes[level]
let siblingIndex = if currentIndex mod 2 == 0: currentIndex + 1 else: currentIndex - 1
if siblingIndex < levelNodes.len:
proof.path.add(levelNodes[siblingIndex])
proof.indices.add(siblingIndex)
currentIndex = currentIndex div 2
return proof
proc verifyMerkleProof*(proof: MerkleProof, leafHash: string, rootHash: string): bool =
## Verify Merkle proof against root hash
if proof.path.len == 0:
return leafHash == rootHash
var currentHash = leafHash
for i, siblingHash in proof.path:
let siblingIndex = proof.indices[i]
let currentIndex = if i == 0: 0 else: proof.indices[i - 1] div 2
if currentIndex mod 2 == 0:
# Current node is left child
currentHash = calculateMerkleHash(currentHash, siblingHash)
else:
# Current node is right child
currentHash = calculateMerkleHash(siblingHash, currentHash)
return currentHash == rootHash
# =============================================================================
# NCA Chunk Creation and Management
# =============================================================================
proc createNcaChunk*(hash: string, data: seq[byte], compressed: bool = true): NcaChunk =
## Factory method to create NCA chunk with proper defaults
NcaChunk(
hash: hash,
data: data,
compressed: compressed,
merkleProof: MerkleProof(path: @[], indices: @[]),
format: NcaChunk,
cryptoAlgorithms: CryptoAlgorithms(
hashAlgorithm: "BLAKE3",
signatureAlgorithm: "Ed25519",
version: "1.0"
)
)
proc createNcaChunkFromData*(data: seq[byte]g, ompressed: bool = true): Result[NcaChunk, NcaError] =
## Create NCA chunk from raw data with automatic hash calculation
try:
if data.len == 0:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Cannot create chunk from empty data",
chunkHash: ""
))
if data.len > MAX_CHUNK_SIZE:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Chunk size exceeds maximum: " & $data.len & " > " & $MAX_CHUNK_SIZE,
chunkHash: ""
))
let hash = calculateBlake3(data)
let chunk = createNcaChunk(hash, data, compressed)
return ok[NcaChunk, NcaError](chunk)
except Exception as e:
return err[NcaChunk, NcaError](NcaError(
code: UnknownError,
msg: "Failed to create NCA chunk: " & e.msg,
chunkHash: ""
))
# =============================================================================
# NCA Binary Format Serialization
# =============================================================================
proc serializeNcaChunk*(chunk: NcaChunk): seq[byte] =
## Serialize NCA chunk to binary format with magic header
var result: seq[byte] = @[]
# Magic bytes
result.add(NCA_MAGIC.toOpenArrayByte(0, NCA_MAGIC.len - 1).toSeq())
# Version (1 byte)
result.add(0x01'u8)
# Flags (1 byte): bit 0 = compressed
var flags: uint8 = 0
if chunk.compressed:
flags = flags or 0x01
result.add(flags)
# Hash algorithm length and data
let hashAlgo = chunk.cryptoAlgorithms.hashAlgorithm
result.add(hashAlgo.len.uint8)
result.add(hashAlgo.toOpenArrayByte(0, hashAlgo.len - 1).toSeq())
# Hash length and data
let hasnk.hash.toOpenArrayByte(0, chunk.hash.len - 1).toSeq()
result.add((hashBytes.len shr 8).uint8) # High byte
result.add((hashBytes.len and 0xFF).uint8) # Low byte
result.add(hashBytes)
# Data length and data
let dataLen = chunk.data.len
result.add((dataLen shr 24).uint8)
result.add((dataLen shr 16).uint8)
result.add((dataLen shr 8).uint8)
result.add((dataLen and 0xFF).uint8)
result.add(chunk.data)
# Merkle proof length and data
let proofJson = %*{
"path": chunk.merkleProof.path,
"indices": chunk.merkleProof.indices
}
let proofData = ($proofJson).toOpenArrayByte(0, ($proofJson).len - 1).toSeq()
result.add((proofData.len shr 8).uint8) # High byte
result.add((proofData.len and 0xFF).uint8) # Low byte
result.add(proofData)
return result
proc deserializeNcaChunk*(data: seq[byte]): Result[NcaChunk, NcaError] =
## Deserialize NCA chunk from binary format
try:
if data.len < NCA_MAGIC.len + 2:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: too small",
chunkHash: ""
))
var offset = 0
# Check magic bytes
let magic = cast[string](data[offset..<offset + NCA_MAGIC.len])
if magic != NCA_MAGIC:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: bad magic bytes",
chunkHash: ""
))
offset += NCA_MAGIC.len
# Version
let version = data[offset]
if version != 0x01:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Unsupported NCA chunk version: " & $version,
chunkHash: ""
))
offset += 1
# Flags
let flags = data[offset]
let compressed = (flags and 0x01) != 0
offset += 1
# Hash algorithm
let hashAlgoLen = data[offset].int
offset += 1
if offset + hashAlgoLen > data.len:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: truncated hash algorithm",
chunkHash: ""
))
let hashAlgo = cast[string](data[offset..<offset + hashAlgoLen])
offset += hashAlgoLen
# Hash
if offset + 2 > data.len:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: truncated hash length",
chunkHash: ""
))
let hashLen = (data[offset].int shl 8) or data[offset + 1].int
offset += 2
if offset + hashLen > data.len:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: truncated hash",
chunkHash: ""
))
let hash = cast[string](data[offset..<offset + hashLen])
offset += hashLen
# Data
if offset + 4 > data.len:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: truncated data length",
chunkHash: ""
))
let dataLen = (data[offset].int shl 24) or (data[offset + 1].int shl 16) or
(data[offset + 2].int shl 8) or data[offset + 3].int
offset += 4
if offset + dataLen > data.len:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: truncated data",
chunkHash: ""
))
let chunkData = data[offset..<offset + dataLen]
offset += dataLen
# Merkle proof
if offset + 2 > data.len:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: truncated proof length",
chunkHash: ""
))
let proofLen = (data[offset].int shl 8) or data[offset + 1].int
offset += 2
if offset + proofLen > data.len:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: truncated proof",
chunkHash: ""
))
var merkleProof = MerkleProof(path: @[], indices: @[])
if proofLen > 0:
let proofData = cast[string](data[offset..<offset + proofLen])
try:
let proofJson = parseJson(proofData)
merkleProof.path = proofJson["path"].getElems().mapIt(it.getStr())
merkleProof.indices = proofJson["indices"].getElems().mapIt(it.getInt())
except JsonParsingError:
return err[NcaChunk, NcaError](NcaError(
code: InvalidMetadata,
msg: "Invalid NCA chunk: malformed Merkle proof",
chunkHash: hash
))
let chunk = NcaChunk(
hash: hash,
data: chunkData,
compressed: compressed,
merkleProof: merkleProof,
format: NcaChunk,
cryptoAlgorithms: CryptoAlgorithms(
hashAlgorithm: hashAlgo,
signatureAlgorithm: "Ed25519",
version: "1.0"
)
)
return ok[NcaChunk, NcaError](chunk)
except Exception as e:
return err[NcaChunk, NcaError](NcaError(
code: UnknownError,
msg: "Failed to deserialize NCA chunk: " & e.msg,
chunkHash: ""
))
# =============================================================================
# NCA File Operations
# =============================================================================
proc saveNcaChunk*(chunk: NcaChunk, filePath: string): Result[void, NcaError] =
## Save NCA chunk to file in binary format
try:
let serialized = serializeNcaChunk(chunk)
# Ensure the file has the correct .nca extension
let finalPath = if filePath.endsWith(".nca"): filePath else: filePath & ".nca"
# Ensure parent directory exists
let parentDir = finalPath.parentDir()
if not dirExists(parentDir):
createDir(parentDir)
writeFile(finalPath, cast[string](serialized))
return ok[void, NcaError]()
except IOError as e:
return err[void, NcaError](NcaError(
code: FileWriteError,
msg: "Failed to save NCA chunk: " & e.msg,
chunkHash: chunk.hash
))
proc loadNcaChunk*(filePath: string): Result[NcaChunk, NcaError] =
## Load NCA chunk from file
try:
if not fileExists(filePath):
return err[NcaChunk, NcaError](NcaError(
code: PackageNotFound,
msg: "NCA chunk file not found: " & filePath,
chunkHash: ""
))
let fileData = readFile(filePath)
let binaryData = fileData.toOpenArrayByte(0, fileData.len - 1).toSeq()
return deserializeNcaChunk(binaryData)
except IOError as e:
return err[NcaChunk, NcaError](NcaError(
code: FileReadError,
msg: "Failed to load NCA chunk: " & e.msg,
chunkHash: ""
))
# =============================================================================
# Chunk Validation
# =============================================================================
proc validateNcaChunk*(chunk: NcaChunk): ChunkValidationResult =
## Validate NCA chunk format and content
var result = ChunkValidationResult(valid: true, errors: @[], warnings: @[])
# Validate hash
if chunk.hash.len == 0:
result.errors.add(ValidationError(
field: "hash",
message: "Chunk hash cannot be empty",
suggestions: @["Calculate chunk hash"]
))
result.valid = false
# Validate data
if chunk.data.len == 0:
result.warnings.add("Chunk contains no data")
if chunk.data.len > MAX_CHUNK_SIZE:
result.errors.add(ValidationError(
field: "data",
message: "Chunk size exceeds maximum: " & $chunk.data.len,
suggestions: @["Split into smaller chunks"]
))
result.valid = false
if chunk.data.len < MIN_CHUNK_SIZE and chunk.data.len > 0:
result.warnings.add("Chunk size is very small: " & $chunk.data.len & " bytes")
# Validate hash integrity
let calculatedHash = calculateBlake3(chunk.data)
if calculatedHash != chunk.hash:
result.errors.add(ValidationError(
field: "hash",
message: "Hash mismatch: calculated " & calculatedHash & " != stored " & chunk.hash,
suggestions: @["Recalculate hash", "Check data integrity"]
))
result.valid = false
# Validate Merkle proof structure
if chunk.merkleProof.path.len != chunk.merkleProof.indices.len:
result.errors.add(ValidationError(
field: "merkleProof",
message: "Merkle proof path and indices length mismatch",
suggestions: @["Regenerate Merkle proof"]
))
result.valid = false
# Validate cryptographic algorithms
if not isQuantumResistant(chunk.cryptoAlgorithms):
result.warnings.add("Using non-quantum-resistant algorithms: " &
chunk.cryptoAlgorithms.hashAlgorithm)
return result
# =============================================================================
# Chunk Deduplication and Retrieval
# =============================================================================
proc storeNcaChunkInCas*(chunk: NcaChunk, cas: CasManager): Result[string, NcaError] =
## Store NCA chunk in content-addressable storage
try:
let serialized = serializeNcaChunk(chunk)
let storeResult = cas.storeObject(serialized)
if storeResult.isErr:
return err[string, NcaError](NcaError(
code: CasError,
msg: "Failed to store chunk in CAS: " & storeResult.getError().msg,
chunkHash: chunk.hash
))
let casObject = storeResult.get()
return ok[string, NcaError](casObject.hash)
except Exception as e:
return err[string, NcaError](NcaError(
code: UnknownError,
msg: "Failed to store NCA chunk: " & e.msg,
chunkHash: chunk.hash
))
proc retrieveNcaChunkFromCas*(hash: string, cas: CasManager): Result[NcaChunk, NcaError] =
## Retrieve NCA chunk from content-addressable storage
try:
let retrieveResult = cas.retrieveObject(hash)
if retrieveResult.isErr:
return err[NcaChunk, NcaError](NcaError(
code: CasError,
msg: "Failed to retrieve chunk from CAS: " & retrieveResult.getError().msg,
chunkHash: hash
))
let data = retrieveResult.get()
return deserializeNcaChunk(data)
except Exception as e:
return err[NcaChunk, NcaError](NcaError(
code: UnknownError,
msg: "Failed to retrieve NCA chunk: " & e.msg,
chunkHash: hash
))
# =============================================================================
# Large File Chunking
# =============================================================================
proc chunkLargeFile*(filePath: string, chunkSize: int = 64 * 1024): Result[seq[NcaChunk], NcaError] =
## Split large file into NCA chunks with Merkle tree
try:
if not fileExists(filePath):
return err[seq[NcaChunk], NcaError](NcaError(
code: PackageNotFound,
msg: "File not found: " & filePath,
chunkHash: ""
))
let file = open(filePath, fmRead)
defer: file.close()
var chunks: seq[NcaChunk] = @[]
var buffer = newSeq[byte](chunkSize)
var chunkHashes: seq[string] = @[]
while true:
let bytesRead = file.readBytes(buffer, 0, chunkSize)
if bytesRead == 0:
break
let chunkData = buffer[0..<bytesRead]
let chunkResult = createNcaChunkFromData(chunkData, compressed = true)
if chunkResult.isErr:
return err[seq[NcaChunk], NcaError](chunkResult.getError())
let chunk = chunkResult.get()
chunks.add(chunk)
chunkHashes.add(chunk.hash)
# Build Merkle tree for all chunks
let merkleTree = buildMerkleTree(chunkHashes)
# Add Merkle proofs to chunks
for i, chunk in chunks.mpairs:
chunk.merkleProof = generateMerkleProof(merkleTree, i)
return ok[seq[NcaChunk], NcaError](chunks)
except IOError as e:
return err[seq[NcaChunk], NcaError](NcaError(
code: FileReadError,
msg: "Failed to chunk file: " & e.msg,
chunkHash: ""
))
proc reconstructFileFromChunks*(chunks: seq[NcaChunk], outputPath: string,
rootHash: string = ""): Result[void, NcaError] =
## Reconstruct file from NCA chunks with Merkle verification
try:
# Verify Merkle proofs if root hash provided
if rootHash.len > 0:
for chunk in chunks:
if not verifyMerkleProof(chunk.merkleProof, chunk.hash, rootHash):
return err[void, NcaError](NcaError(
code: ChecksumMismatch,
msg: "Merkle proof verification failed for chunk: " & chunk.hash,
chunkHash: chunk.hash
))
# Ensure parent directory exists
let parentDir = outputPath.parentDir()
if not dirExists(parentDir):
createDir(parentDir)
let outputFile = open(outputPath, fmWrite)
defer: outputFile.close()
# Write chunks in order
for chunk in chunks:
discard outputFile.writeBuffer(chunk.data.unsafeAddr, chunk.data.len)
return ok[void, NcaError]()
except IOError as e:
return err[void, NcaError](NcaError(
code: FileWriteError,
msg: "Failed to reconstruct file: " & e.msg,
chunkHash: ""
))
# =============================================================================
# Utility Functions
# =============================================================================
proc getNcaInfo*(chunk: NcaChunk): string =
## Get human-readable chunk information
result = "NCA Chunk: " & chunk.hash & "\n"
result.add("Size: " & $chunk.data.len & " bytes\n")
result.add("Compressed: " & $chunk.compressed & "\n")
result.add("Algorithm: " & chunk.cryptoAlgorithms.hashAlgorithm & "\n")
result.add("Merkle Proof: " & $chunk.merkleProof.path.len & " nodes\n")
proc calculateBlake3*(data: seq[byte]): string =
## Calculate BLAKE3 hash - imported from CAS module
cas.calculateBlake3(data)
proc calculateBlake2b*(data: seq[byte]): string =
## Calculate BLAKE2b hash - imported from CAS module
cas.calculateBlake2b(data)