131 lines
3.8 KiB
Nim
131 lines
3.8 KiB
Nim
import unittest, os, strutils, tables, sets
|
|
import ../src/nimpak/cas
|
|
import ../src/nip/types
|
|
|
|
suite "Cross-Format Deduplication Metrics Tests":
|
|
|
|
var
|
|
cas: CasManager
|
|
testRoot = getTempDir() / "nip_dedup_test_" & $getCurrentProcessId()
|
|
|
|
# Test data
|
|
chunk1 = @[1.byte, 2.byte, 3.byte] # 3 bytes
|
|
chunk2 = @[4.byte, 5.byte, 6.byte] # 3 bytes
|
|
chunk3 = @[7.byte, 8.byte, 9.byte] # 3 bytes
|
|
|
|
hash1: string
|
|
hash2: string
|
|
hash3: string
|
|
|
|
setup:
|
|
createDir(testRoot)
|
|
cas = initCasManager(testRoot)
|
|
|
|
# Store chunks
|
|
let res1 = cas.storeObject(chunk1)
|
|
let res2 = cas.storeObject(chunk2)
|
|
let res3 = cas.storeObject(chunk3)
|
|
|
|
hash1 = res1.get().hash
|
|
hash2 = res2.get().hash
|
|
hash3 = res3.get().hash
|
|
|
|
teardown:
|
|
removeDir(testRoot)
|
|
|
|
test "Basic Deduplication Stats":
|
|
# Scenario:
|
|
# NPK uses chunk1, chunk2
|
|
# NIP uses chunk2, chunk3
|
|
# NEXTER uses chunk1, chunk3
|
|
|
|
# chunk1: NPK, NEXTER (Ref count 2)
|
|
# chunk2: NPK, NIP (Ref count 2)
|
|
# chunk3: NIP, NEXTER (Ref count 2)
|
|
|
|
discard cas.addReference(hash1, NPK, "pkg1")
|
|
discard cas.addReference(hash2, NPK, "pkg1")
|
|
|
|
discard cas.addReference(hash2, NIP, "pkg2")
|
|
discard cas.addReference(hash3, NIP, "pkg2")
|
|
|
|
discard cas.addReference(hash1, NEXTER, "pkg3")
|
|
discard cas.addReference(hash3, NEXTER, "pkg3")
|
|
|
|
let statsResult = cas.getDeduplicationStats()
|
|
check statsResult.isOk
|
|
let stats = statsResult.get()
|
|
|
|
# Physical size: 3 chunks * 3 bytes = 9 bytes
|
|
check stats.totalPhysicalSize == 9
|
|
|
|
# Logical size:
|
|
# pkg1: 3+3 = 6
|
|
# pkg2: 3+3 = 6
|
|
# pkg3: 3+3 = 6
|
|
# Total: 18 bytes
|
|
check stats.totalLogicalSize == 18
|
|
|
|
# Deduplication ratio: 18 / 9 = 2.0
|
|
check stats.deduplicationRatio == 2.0
|
|
|
|
# Shared chunks: All 3 are shared
|
|
check stats.sharedChunks == 3
|
|
|
|
# Savings: 18 - 9 = 9 bytes
|
|
check stats.savings == 9
|
|
|
|
# Format Overlap
|
|
# chunk1: NPK-NEXTER
|
|
# chunk2: NIP-NPK
|
|
# chunk3: NEXTER-NIP
|
|
|
|
check stats.formatOverlap.hasKey("NEXTER-NPK")
|
|
check stats.formatOverlap["NEXTER-NPK"] == 1
|
|
|
|
check stats.formatOverlap.hasKey("NIP-NPK")
|
|
check stats.formatOverlap["NIP-NPK"] == 1
|
|
|
|
check stats.formatOverlap.hasKey("NEXTER-NIP")
|
|
check stats.formatOverlap["NEXTER-NIP"] == 1
|
|
|
|
test "No Deduplication":
|
|
# Scenario: Unique chunks for each
|
|
discard cas.addReference(hash1, NPK, "pkg1")
|
|
discard cas.addReference(hash2, NIP, "pkg2")
|
|
discard cas.addReference(hash3, NEXTER, "pkg3")
|
|
|
|
let statsResult = cas.getDeduplicationStats()
|
|
check statsResult.isOk
|
|
let stats = statsResult.get()
|
|
|
|
check stats.totalPhysicalSize == 9
|
|
check stats.totalLogicalSize == 9
|
|
check stats.deduplicationRatio == 1.0
|
|
check stats.sharedChunks == 0
|
|
check stats.savings == 0
|
|
check stats.formatOverlap.len == 0
|
|
|
|
test "High Redundancy":
|
|
# Scenario: All formats use same chunk
|
|
discard cas.addReference(hash1, NPK, "pkg1")
|
|
discard cas.addReference(hash1, NIP, "pkg2")
|
|
discard cas.addReference(hash1, NEXTER, "pkg3")
|
|
|
|
let statsResult = cas.getDeduplicationStats()
|
|
check statsResult.isOk
|
|
let stats = statsResult.get()
|
|
|
|
# Physical: 3 bytes (only chunk1 counted, others ignored if not referenced? No, we only iterate referenced hashes)
|
|
# Wait, hash2 and hash3 exist in CAS but are NOT referenced.
|
|
# getDeduplicationStats iterates over `cas.formatRefs`.
|
|
# So unreferenced chunks are NOT included in stats.
|
|
|
|
check stats.totalPhysicalSize == 3
|
|
check stats.totalLogicalSize == 9 # 3 refs * 3 bytes
|
|
check stats.deduplicationRatio == 3.0
|
|
check stats.sharedChunks == 1
|
|
|
|
check stats.formatOverlap.hasKey("NEXTER-NIP-NPK")
|
|
check stats.formatOverlap["NEXTER-NIP-NPK"] == 1
|