A script to verify a tree of tarballs against .md5 files

A script to verify files in our destination tree structure that have been
created using the and scripts.
This runs on a local Linux machine, on which the tape archive system is mounted.
This script uses veritar which is installed
with the system Python on the Linux machine. Because veritar is quite old, this
currently requires Python 2.7. It could alternatively be installed into a
Python 2.7 virtual environent or conda environment.
Following generation of the archive tree with the and scripts, each directory node in the tree should contain either
0 or 2 files. In the case of 2 files, one of which is a .md5, if the other file
is not a tar file, the node is currently skipped. If it is a tar file, veritar
is used to compare the tar file contents against the md5sum-generated .md5 file.
This script needs to be rewritten so that non-tars are checked by
running md5sum and using diff to check against the .md5 file.
from __future__ import print_function
import veritar.veritar as vv
import os
import time
BASE = '/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick'
SKIP_TO_DIR = '/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick/output/Iodine_CT/R1820/K1/Scan2_FRC'
class optobj:
def __init__(self):
self.verbose = False
self.quiet = True
def check_oneof_tar_and_md5(files):
"""True iff files contains two entries in the order .md5 then .tar"""
exts = [os.path.splitext(f)[1] for f in files]
if len(files) != 2:
return False
if exts[0] != '.md5' or exts[1] != '.tar':
return False
return True
def check_two_files_one_md5(files):
"""True iff files contains two entries and exactly one is a .md5"""
exts = [os.path.splitext(f)[1] for f in files]
if len(files) != 2:
return False
if '.md5' not in exts:
return False
if exts[0] == exts[1]:
return False
return True
def lines_in(filename):
"""I'm using this
It actually gets it wrong, but I don't care; it's just to get an estimate
lines = 0
buffer = bytearray(2048)
with open(filename) as f:
while f.readinto(buffer) > 0:
lines += buffer.count('\n')
return lines
opts = optobj()
failures_of_check_oneof_tar_and_md5 = []
failures_of_check_two_files_one_md5 = []
tar_checksum_failures = []
for root, dirs, files in os.walk(BASE):
print('Directory: %s' % root)
if SKIP_FORWARD and root != SKIP_TO_DIR:
sorted_files = [os.path.join(root, f) for f in sorted(files)]
if len(sorted_files) != 0:
oneof_tar_and_md5 = check_oneof_tar_and_md5(sorted_files)
two_files_one_md5 = check_two_files_one_md5(sorted_files)
if not JUST_SHOW_NON_TARS and (oneof_tar_and_md5 and two_files_one_md5):
md5, tar = sorted_files
print(lines_in(md5), ' lines in ', md5)
v = vv.TarVerification(tar, md5, opts)
if v.s.Processed != v.s.Good:
if not oneof_tar_and_md5:
if not two_files_one_md5:
print('Failures of check_oneof_tar_and_md5:')
print('Failures of check_two_files_one_md5:')
print('Checksum failures:')
