Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
remote_tree_to_local_tars
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gary Ruben
remote_tree_to_local_tars
Commits
fd3ae572
Commit
fd3ae572
authored
6 years ago
by
Gary Ruben
Browse files
Options
Downloads
Patches
Plain Diff
A script to verify a tree of tarballs against .md5 files
parent
0b4176ab
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
veritar_walk.py
+112
-0
112 additions, 0 deletions
veritar_walk.py
with
112 additions
and
0 deletions
veritar_walk.py
0 → 100644
+
112
−
0
View file @
fd3ae572
"""
A script to verify files in our destination tree structure that have been
created using the asci_to_vault.py and md5_to_vault.py scripts.
This runs on a local Linux machine, on which the tape archive system is mounted.
This script uses veritar https://github.com/gazzar/veritar which is installed
with the system Python on the Linux machine. Because veritar is quite old, this
currently requires Python 2.7. It could alternatively be installed into a
Python 2.7 virtual environent or conda environment.
Following generation of the archive tree with the asci_to_vault.py and
md5_to_vault.py scripts, each directory node in the tree should contain either
0 or 2 files. In the case of 2 files, one of which is a .md5, if the other file
is not a tar file, the node is currently skipped. If it is a tar file, veritar
is used to compare the tar file contents against the md5sum-generated .md5 file.
Todo:
This script needs to be rewritten so that non-tars are checked by
running md5sum and using diff to check against the .md5 file.
Authors:
gary.ruben@monash.edu
"""
from
__future__
import
print_function
import
veritar.veritar
as
vv
import
os
import
time
BASE
=
'
/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick
'
SKIP_TO_DIR
=
'
/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick/output/Iodine_CT/R1820/K1/Scan2_FRC
'
SKIP_FORWARD
=
False
JUST_SHOW_NON_TARS
=
False
class
optobj
:
def
__init__
(
self
):
self
.
verbose
=
False
self
.
quiet
=
True
def
check_oneof_tar_and_md5
(
files
):
"""
True iff files contains two entries in the order .md5 then .tar
"""
exts
=
[
os
.
path
.
splitext
(
f
)[
1
]
for
f
in
files
]
if
len
(
files
)
!=
2
:
return
False
if
exts
[
0
]
!=
'
.md5
'
or
exts
[
1
]
!=
'
.tar
'
:
return
False
return
True
def
check_two_files_one_md5
(
files
):
"""
True iff files contains two entries and exactly one is a .md5
"""
exts
=
[
os
.
path
.
splitext
(
f
)[
1
]
for
f
in
files
]
if
len
(
files
)
!=
2
:
return
False
if
'
.md5
'
not
in
exts
:
return
False
if
exts
[
0
]
==
exts
[
1
]:
return
False
return
True
def
lines_in
(
filename
):
"""
I
'
m using this https://stackoverflow.com/a/15074925
It actually gets it wrong, but I don
'
t care; it
'
s just to get an estimate
"""
lines
=
0
buffer
=
bytearray
(
2048
)
with
open
(
filename
)
as
f
:
while
f
.
readinto
(
buffer
)
>
0
:
lines
+=
buffer
.
count
(
'
\n
'
)
return
lines
opts
=
optobj
()
failures_of_check_oneof_tar_and_md5
=
[]
failures_of_check_two_files_one_md5
=
[]
tar_checksum_failures
=
[]
for
root
,
dirs
,
files
in
os
.
walk
(
BASE
):
print
(
'
Directory: %s
'
%
root
)
if
SKIP_FORWARD
and
root
!=
SKIP_TO_DIR
:
continue
SKIP_FORWARD
=
False
sorted_files
=
[
os
.
path
.
join
(
root
,
f
)
for
f
in
sorted
(
files
)]
if
len
(
sorted_files
)
!=
0
:
oneof_tar_and_md5
=
check_oneof_tar_and_md5
(
sorted_files
)
two_files_one_md5
=
check_two_files_one_md5
(
sorted_files
)
if
not
JUST_SHOW_NON_TARS
and
(
oneof_tar_and_md5
and
two_files_one_md5
):
md5
,
tar
=
sorted_files
print
(
time
.
strftime
(
"
%H:%M:%S:
"
))
print
(
lines_in
(
md5
),
'
lines in
'
,
md5
)
v
=
vv
.
TarVerification
(
tar
,
md5
,
opts
)
v
.
run
()
if
v
.
s
.
Processed
!=
v
.
s
.
Good
:
tar_checksum_failures
.
append
(
root
)
continue
if
not
oneof_tar_and_md5
:
failures_of_check_oneof_tar_and_md5
.
append
(
root
)
if
not
two_files_one_md5
:
failures_of_check_two_files_one_md5
.
append
(
root
)
print
(
'
Failures of check_oneof_tar_and_md5:
'
)
print
(
failures_of_check_oneof_tar_and_md5
)
print
(
'
Failures of check_two_files_one_md5:
'
)
print
(
failures_of_check_two_files_one_md5
)
print
(
'
Checksum failures:
'
)
print
(
tar_checksum_failures
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment