Skip to content
Snippets Groups Projects
Commit 693b3525 authored by Gary Ruben's avatar Gary Ruben
Browse files

Improvements to printing useful things to the log file

parent 2adc8740
No related branches found
No related tags found
No related merge requests found
...@@ -26,11 +26,19 @@ Note: The current version of fabric generates warnings. This issue is discussed ...@@ -26,11 +26,19 @@ Note: The current version of fabric generates warnings. This issue is discussed
Notes Notes
----- -----
This is a possible option for checksumming. This is a possible option for checksumming:
https://stackoverflow.com/q/45819356/ https://stackoverflow.com/q/45819356/
KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}') KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}')
We used the following command to check whether a transfer was succusseful
immediately prior to a failure of the ASCI filesystem.
The command to count the number of files in a tarball
$ tar -tf Lamb_Lung_Microfil_CT_18011B_right_CT.tar | wc -l
75920
Keyword arguments for fabric's Connection.run() are documented here:
http://docs.pyinvoke.org/en/1.2/api/runners.html#invoke.runners.Runner.run
""" """
import os import os
import sys import sys
...@@ -45,17 +53,15 @@ from fabric import Connection ...@@ -45,17 +53,15 @@ from fabric import Connection
READ_PICKLE_FILE = False READ_PICKLE_FILE = False
EXPERIMENT_NAME = "13660a" EXPERIMENT_NAME = "13660b"
PICKLE_FILENAME = os.path.join(os.path.dirname(__file__), "tree_state.pickle") PICKLE_FILENAME = os.path.join(os.path.dirname(__file__), "tree_state.pickle")
timestamp = time.strftime("%Y-%m-%d-%H%M%S") timestamp = time.strftime("%Y-%m-%d-%H%M%S")
LOG_FILENAME = os.path.join( LOG_FILENAME = os.path.join(
os.path.dirname(__file__), os.path.dirname(__file__),
f"{EXPERIMENT_NAME}-{timestamp}.log" f"{EXPERIMENT_NAME}-{timestamp}.log"
) )
REMOTE_LOGIN = "gary.ruben@monash.edu@sftp.synchrotron.org.au" REMOTE_LOGIN = "gary.ruben@monash.edu@sftp1.synchrotron.org.au"
SRC_PATH = "/data/13660a/asci/input" SRC_PATH = "/data/13660b/asci/input"
# SRC_PATH = "/data/13660a/asci/output/tar_test"
# DEST_PATH = "/home/grub0002/bapcxi/vault/rubbish"
DEST_PATH = "/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick" DEST_PATH = "/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick"
...@@ -98,15 +104,23 @@ def tar_and_send_directory(node): ...@@ -98,15 +104,23 @@ def tar_and_send_directory(node):
""" """
# Check if there are any files in the node # Check if there are any files in the node
# add check to make sure connection is working
with Connection(REMOTE_LOGIN) as c: with Connection(REMOTE_LOGIN) as c:
files = c.run(f'cd {node.src}; find -type f -maxdepth 1') files = c.run(
f'cd {node.src}; find -type f -maxdepth 1',
echo=True
)
files = files.stdout.strip() files = files.stdout.strip()
# if not c.is_connected():
# print(f'Connection failed on {node.src}')
# node.error = "Connection failure on initial file find"
# return
node.count = files.count('/') node.count = files.count('/')
print(f'Node:{node.src}, file count:{node.count}') print(f'Node:{node.src}, file count:{node.count}')
if node.count == 0: if node.count == 0:
# No files at this node, just return # No files at this node, just return
pass print('No files to transfer')
elif node.count == 1: elif node.count == 1:
# Only one file. No need to tar. Just copy unchanged. # Only one file. No need to tar. Just copy unchanged.
output = subprocess.run( output = subprocess.run(
...@@ -119,7 +133,7 @@ def tar_and_send_directory(node): ...@@ -119,7 +133,7 @@ def tar_and_send_directory(node):
print('stdout:', output.stdout) print('stdout:', output.stdout)
print('stderr:', output.stderr) print('stderr:', output.stderr)
os.chmod(f'{node.dest}/{files}', 0o550) os.chmod(f'{node.dest}/{files}', 0o550)
print(f'transfer {node.src} -> {node.dest}') print(f'Transferred single file {node.src} -> {node.dest}')
else: else:
# More than one file. Transfer all files to a tarball. # More than one file. Transfer all files to a tarball.
if node.src == SRC_PATH: if node.src == SRC_PATH:
...@@ -137,7 +151,7 @@ def tar_and_send_directory(node): ...@@ -137,7 +151,7 @@ def tar_and_send_directory(node):
print('stdout:', output.stdout) print('stdout:', output.stdout)
print('stderr:', output.stderr) print('stderr:', output.stderr)
os.chmod(f'{node.dest}/{filename}.tar', 0o550) os.chmod(f'{node.dest}/{filename}.tar', 0o550)
print(f'transfer {node.src} -> {node.dest}') print(f'Transferred {node.count} files {node.src} -> {node.dest}')
node.processed = True node.processed = True
...@@ -148,7 +162,20 @@ if __name__ == "__main__": ...@@ -148,7 +162,20 @@ if __name__ == "__main__":
# A hacky way to restart an interrupted transfer is to set # A hacky way to restart an interrupted transfer is to set
# READ_PICKLE_FILE = True above so that the transfer state is retrieved. By # READ_PICKLE_FILE = True above so that the transfer state is retrieved. By
# default the tree is built from scratch from the remote file system. # default the tree is built from scratch from the remote file system.
if not READ_PICKLE_FILE: if READ_PICKLE_FILE:
# Read the saved transfer state from the locally pickled tree object.
with open(PICKLE_FILENAME, 'rb') as f:
tree = pickle.load(f)
print('tree:')
pprint.pprint(tree)
# Reset nodes with count==0 to unprocessed. We observed a failure that
# mistakenly reported source tree nodes to have 0 files, so force a
# recheck of those. The side-effect is to recheck genuinely empty nodes.
for node in tree:
if node.count == 0:
node.processed = False
else:
# Get the directory tree from remote server as a list # Get the directory tree from remote server as a list
with Connection(REMOTE_LOGIN) as c: with Connection(REMOTE_LOGIN) as c:
result = c.run(f'find {SRC_PATH} -type d') result = c.run(f'find {SRC_PATH} -type d')
...@@ -160,15 +187,9 @@ if __name__ == "__main__": ...@@ -160,15 +187,9 @@ if __name__ == "__main__":
for src in remote_dirs: for src in remote_dirs:
dest = src.replace(SRC_PATH, DEST_PATH) dest = src.replace(SRC_PATH, DEST_PATH)
tree.append(Node(src, dest)) tree.append(Node(src, dest))
else:
# Read the saved transfer state from the locally pickled tree object.
with open(PICKLE_FILENAME, 'rb') as f:
tree = pickle.load(f)
print('tree:')
pprint.pprint(tree)
# Transfer all directory tree nodes # Transfer all directory tree nodes
for node in tree: for i, node in enumerate(tree):
if not node.processed: if not node.processed:
pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True) pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True)
os.chmod(node.dest, 0o770) os.chmod(node.dest, 0o770)
...@@ -177,3 +198,5 @@ if __name__ == "__main__": ...@@ -177,3 +198,5 @@ if __name__ == "__main__":
# pickle the tree to keep a record of the processed state # pickle the tree to keep a record of the processed state
with open(PICKLE_FILENAME, 'wb') as f: with open(PICKLE_FILENAME, 'wb') as f:
pickle.dump(tree, f) pickle.dump(tree, f)
print(f"Processed {i + 1} of {len(tree)} directory tree nodes")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment