130 lines
3.9 KiB
Python
Executable File
130 lines
3.9 KiB
Python
Executable File
import hashlib, sys, os, math
|
|
from struct import *
|
|
|
|
WARN_ON_FILESIZE = 1024 * 1024 * 1024 * 10
|
|
ERROR_ON_FILESIZE = 1024 * 1024 * 1024 * 20
|
|
|
|
MIN_CHUNK_SIZE = 1024
|
|
MAX_CHUNK_SIZE = 1024 * 1024 * 2 # 2 MB
|
|
CHUNK_SIZE_PERCENT = 0.1 # Aim for 10% of each file
|
|
|
|
# Ensure we get a directory as a parameter
|
|
if len(sys.argv) < 2:
|
|
sys.exit("Usage: make-manifest-and-hashes.py <directory>")
|
|
|
|
targetdir = sys.argv[1]
|
|
|
|
if not os.path.isdir(targetdir):
|
|
sys.exit("'" + targetdir + "' is not a directory.")
|
|
|
|
|
|
# Binary write function, called once per chunk hashed, and per whole-file chunk
|
|
def write_hash(hashfile, offset, length, digest):
|
|
#print hex(offset) + " + " + hex(length) + " (" + str(length) + " bytes) " + digest.hexdigest()
|
|
hashfile.write(pack("QQ", offset, length))
|
|
hashfile.write(digest.digest())
|
|
|
|
# Progressive hash function, called once per file
|
|
def calc_hash(fullpath, size):
|
|
inputFile = open(fullpath, "rb")
|
|
hashFile = open(fullpath + ".hash", "wb")
|
|
|
|
ongoingDigest = hashlib.sha256()
|
|
|
|
offset = 0
|
|
|
|
# Pick a decent chunk size
|
|
targetChunkSize = min(max(size * CHUNK_SIZE_PERCENT, MIN_CHUNK_SIZE), MAX_CHUNK_SIZE)
|
|
targetChunkSize = int(1024 * (math.ceil(targetChunkSize / 1024.0)))
|
|
|
|
# For large files, display extra progress info
|
|
bigfile = size > 100 * 1024 * 1024
|
|
progressSplit = 10
|
|
|
|
# Loop over the length of the file
|
|
while offset < size:
|
|
# Read a chunk of data
|
|
chunkSize = min(targetChunkSize, size-offset)
|
|
data = inputFile.read(chunkSize)
|
|
|
|
# Calculate hash of this chunk
|
|
chunkDigest = hashlib.sha256(data)
|
|
|
|
# Update the progressive hash of the whole file
|
|
ongoingDigest.update(data)
|
|
|
|
# Write binary hash
|
|
write_hash(hashFile, offset, chunkSize, chunkDigest)
|
|
|
|
# Optionally show progress info
|
|
prevIntProgress = int(progressSplit * offset / size)
|
|
offset += chunkSize
|
|
currentIntProgress = int(progressSplit * offset / size)
|
|
|
|
if (bigfile and currentIntProgress > 0 and currentIntProgress != prevIntProgress):
|
|
print str(100 * currentIntProgress / progressSplit) + "%..."
|
|
|
|
|
|
# For files of more than one chunk, add the whole-file hash
|
|
#if (targetChunkSize < size):
|
|
# write_hash(hashFile, 0, size, ongoingDigest)
|
|
|
|
inputFile.close()
|
|
hashFile.close()
|
|
|
|
return ongoingDigest.hexdigest()
|
|
|
|
|
|
# Beginning of main code - open the manifest
|
|
manifest = open(os.path.join(targetdir, "manifest"), "w")
|
|
|
|
# Calculate the amount to trim from full paths to make them relative
|
|
# (target path + 1 for trailing slash)
|
|
trimLength = len(targetdir) + 1
|
|
|
|
largeFileWarnings = 0
|
|
|
|
# Iterate over all files in target directory, recursing into subdirectories
|
|
for root, dirs, files in os.walk(targetdir):
|
|
for f in files:
|
|
if not (f.endswith(".hash") or f == "manifest"):
|
|
|
|
fullpath = os.path.join(root, f)
|
|
relativepath = fullpath[trimLength:].replace("\\","/")
|
|
size = os.path.getsize(fullpath)
|
|
|
|
print relativepath
|
|
print size
|
|
|
|
if size > ERROR_ON_FILESIZE:
|
|
print "ERROR: File is greater than " + str(ERROR_ON_FILESIZE) + " bytes (see B*2185704)"
|
|
print "Aborting."
|
|
manifest.close()
|
|
os.remove(os.path.join(targetdir, "manifest"))
|
|
os.system("pause")
|
|
sys.exit("Erroneous filesize encountered.")
|
|
|
|
elif size > WARN_ON_FILESIZE:
|
|
print "WARNING: File is greater than " + str(WARN_ON_FILESIZE) + " bytes (see B*2185704)"
|
|
largeFileWarnings += 1
|
|
|
|
hexdigest = calc_hash(fullpath, size)
|
|
print hexdigest
|
|
|
|
manifest.write(relativepath + "\n")
|
|
manifest.write(str(size) + "\n")
|
|
manifest.write(hexdigest + "\n")
|
|
|
|
# Tidy up
|
|
manifest.close()
|
|
|
|
print "Finished."
|
|
|
|
if largeFileWarnings > 0:
|
|
print
|
|
if largeFileWarnings == 1:
|
|
print "WARNING: 1 file was greater than " + str(WARN_ON_FILESIZE) + " bytes (see B*2185704)"
|
|
else:
|
|
print "WARNING: " + str(largeFileWarnings) + " files were greater than " + str(WARN_ON_FILESIZE) + " bytes (see B*2185704)"
|
|
print
|
|
os.system("pause") |