Commit 693b3525 authored by Gary Ruben's avatar Gary Ruben
Browse files

Improvements to printing useful things to the log file

parent 2adc8740
......@@ -26,11 +26,19 @@ Note: The current version of fabric generates warnings. This issue is discussed
Notes
-----
This is a possible option for checksumming.
This is a possible option for checksumming:
https://stackoverflow.com/q/45819356/
KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}')
We used the following command to check whether a transfer was succusseful
immediately prior to a failure of the ASCI filesystem.
The command to count the number of files in a tarball
$ tar -tf Lamb_Lung_Microfil_CT_18011B_right_CT.tar | wc -l
75920
Keyword arguments for fabric's Connection.run() are documented here:
http://docs.pyinvoke.org/en/1.2/api/runners.html#invoke.runners.Runner.run
"""
import os
import sys
......@@ -45,17 +53,15 @@ from fabric import Connection
READ_PICKLE_FILE = False
EXPERIMENT_NAME = "13660a"
EXPERIMENT_NAME = "13660b"
PICKLE_FILENAME = os.path.join(os.path.dirname(__file__), "tree_state.pickle")
timestamp = time.strftime("%Y-%m-%d-%H%M%S")
LOG_FILENAME = os.path.join(
os.path.dirname(__file__),
f"{EXPERIMENT_NAME}-{timestamp}.log"
)
REMOTE_LOGIN = "gary.ruben@monash.edu@sftp.synchrotron.org.au"
SRC_PATH = "/data/13660a/asci/input"
# SRC_PATH = "/data/13660a/asci/output/tar_test"
# DEST_PATH = "/home/grub0002/bapcxi/vault/rubbish"
REMOTE_LOGIN = "gary.ruben@monash.edu@sftp1.synchrotron.org.au"
SRC_PATH = "/data/13660b/asci/input"
DEST_PATH = "/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick"
......@@ -98,15 +104,23 @@ def tar_and_send_directory(node):
"""
# Check if there are any files in the node
# add check to make sure connection is working
with Connection(REMOTE_LOGIN) as c:
files = c.run(f'cd {node.src}; find -type f -maxdepth 1')
files = c.run(
f'cd {node.src}; find -type f -maxdepth 1',
echo=True
)
files = files.stdout.strip()
# if not c.is_connected():
# print(f'Connection failed on {node.src}')
# node.error = "Connection failure on initial file find"
# return
node.count = files.count('/')
print(f'Node:{node.src}, file count:{node.count}')
if node.count == 0:
# No files at this node, just return
pass
print('No files to transfer')
elif node.count == 1:
# Only one file. No need to tar. Just copy unchanged.
output = subprocess.run(
......@@ -119,7 +133,7 @@ def tar_and_send_directory(node):
print('stdout:', output.stdout)
print('stderr:', output.stderr)
os.chmod(f'{node.dest}/{files}', 0o550)
print(f'transfer {node.src} -> {node.dest}')
print(f'Transferred single file {node.src} -> {node.dest}')
else:
# More than one file. Transfer all files to a tarball.
if node.src == SRC_PATH:
......@@ -137,7 +151,7 @@ def tar_and_send_directory(node):
print('stdout:', output.stdout)
print('stderr:', output.stderr)
os.chmod(f'{node.dest}/{filename}.tar', 0o550)
print(f'transfer {node.src} -> {node.dest}')
print(f'Transferred {node.count} files {node.src} -> {node.dest}')
node.processed = True
......@@ -148,7 +162,20 @@ if __name__ == "__main__":
# A hacky way to restart an interrupted transfer is to set
# READ_PICKLE_FILE = True above so that the transfer state is retrieved. By
# default the tree is built from scratch from the remote file system.
if not READ_PICKLE_FILE:
if READ_PICKLE_FILE:
# Read the saved transfer state from the locally pickled tree object.
with open(PICKLE_FILENAME, 'rb') as f:
tree = pickle.load(f)
print('tree:')
pprint.pprint(tree)
# Reset nodes with count==0 to unprocessed. We observed a failure that
# mistakenly reported source tree nodes to have 0 files, so force a
# recheck of those. The side-effect is to recheck genuinely empty nodes.
for node in tree:
if node.count == 0:
node.processed = False
else:
# Get the directory tree from remote server as a list
with Connection(REMOTE_LOGIN) as c:
result = c.run(f'find {SRC_PATH} -type d')
......@@ -160,15 +187,9 @@ if __name__ == "__main__":
for src in remote_dirs:
dest = src.replace(SRC_PATH, DEST_PATH)
tree.append(Node(src, dest))
else:
# Read the saved transfer state from the locally pickled tree object.
with open(PICKLE_FILENAME, 'rb') as f:
tree = pickle.load(f)
print('tree:')
pprint.pprint(tree)
# Transfer all directory tree nodes
for node in tree:
for i, node in enumerate(tree):
if not node.processed:
pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True)
os.chmod(node.dest, 0o770)
......@@ -177,3 +198,5 @@ if __name__ == "__main__":
# pickle the tree to keep a record of the processed state
with open(PICKLE_FILENAME, 'wb') as f:
pickle.dump(tree, f)
print(f"Processed {i + 1} of {len(tree)} directory tree nodes")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment