Skip to content
Snippets Groups Projects
Commit fd3ae572 authored by Gary Ruben's avatar Gary Ruben
Browse files

A script to verify a tree of tarballs against .md5 files

parent 0b4176ab
No related branches found
No related tags found
No related merge requests found
"""
A script to verify files in our destination tree structure that have been
created using the asci_to_vault.py and md5_to_vault.py scripts.
This runs on a local Linux machine, on which the tape archive system is mounted.
This script uses veritar https://github.com/gazzar/veritar which is installed
with the system Python on the Linux machine. Because veritar is quite old, this
currently requires Python 2.7. It could alternatively be installed into a
Python 2.7 virtual environent or conda environment.
Following generation of the archive tree with the asci_to_vault.py and
md5_to_vault.py scripts, each directory node in the tree should contain either
0 or 2 files. In the case of 2 files, one of which is a .md5, if the other file
is not a tar file, the node is currently skipped. If it is a tar file, veritar
is used to compare the tar file contents against the md5sum-generated .md5 file.
Todo:
This script needs to be rewritten so that non-tars are checked by
running md5sum and using diff to check against the .md5 file.
Authors:
gary.ruben@monash.edu
"""
from __future__ import print_function
import veritar.veritar as vv
import os
import time
BASE = '/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick'
SKIP_TO_DIR = '/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick/output/Iodine_CT/R1820/K1/Scan2_FRC'
SKIP_FORWARD = False
JUST_SHOW_NON_TARS = False
class optobj:
def __init__(self):
self.verbose = False
self.quiet = True
def check_oneof_tar_and_md5(files):
"""True iff files contains two entries in the order .md5 then .tar"""
exts = [os.path.splitext(f)[1] for f in files]
if len(files) != 2:
return False
if exts[0] != '.md5' or exts[1] != '.tar':
return False
return True
def check_two_files_one_md5(files):
"""True iff files contains two entries and exactly one is a .md5"""
exts = [os.path.splitext(f)[1] for f in files]
if len(files) != 2:
return False
if '.md5' not in exts:
return False
if exts[0] == exts[1]:
return False
return True
def lines_in(filename):
"""I'm using this https://stackoverflow.com/a/15074925
It actually gets it wrong, but I don't care; it's just to get an estimate
"""
lines = 0
buffer = bytearray(2048)
with open(filename) as f:
while f.readinto(buffer) > 0:
lines += buffer.count('\n')
return lines
opts = optobj()
failures_of_check_oneof_tar_and_md5 = []
failures_of_check_two_files_one_md5 = []
tar_checksum_failures = []
for root, dirs, files in os.walk(BASE):
print('Directory: %s' % root)
if SKIP_FORWARD and root != SKIP_TO_DIR:
continue
SKIP_FORWARD = False
sorted_files = [os.path.join(root, f) for f in sorted(files)]
if len(sorted_files) != 0:
oneof_tar_and_md5 = check_oneof_tar_and_md5(sorted_files)
two_files_one_md5 = check_two_files_one_md5(sorted_files)
if not JUST_SHOW_NON_TARS and (oneof_tar_and_md5 and two_files_one_md5):
md5, tar = sorted_files
print(time.strftime("%H:%M:%S:"))
print(lines_in(md5), ' lines in ', md5)
v = vv.TarVerification(tar, md5, opts)
v.run()
if v.s.Processed != v.s.Good:
tar_checksum_failures.append(root)
continue
if not oneof_tar_and_md5:
failures_of_check_oneof_tar_and_md5.append(root)
if not two_files_one_md5:
failures_of_check_two_files_one_md5.append(root)
print('Failures of check_oneof_tar_and_md5:')
print(failures_of_check_oneof_tar_and_md5)
print('Failures of check_two_files_one_md5:')
print(failures_of_check_two_files_one_md5)
print('Checksum failures:')
print(tar_checksum_failures)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment