From 81d6345907f4de0545359c4b96b0f959f5067055 Mon Sep 17 00:00:00 2001 From: gary <gary.ruben@monash.edu> Date: Wed, 6 Mar 2019 16:43:30 +1100 Subject: [PATCH] Added file count to node object and got tree pickling recovery working --- asci_to_vault.py | 65 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/asci_to_vault.py b/asci_to_vault.py index 27d4203..9888df8 100644 --- a/asci_to_vault.py +++ b/asci_to_vault.py @@ -11,6 +11,9 @@ import warnings from dataclasses import dataclass import pathlib import subprocess +import pickle +import pprint + # This isn't suppressing the warnings that fabric is generating; we need to # investigate further @@ -26,6 +29,9 @@ https://stackoverflow.com/q/45819356/ KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}') """ +READ_PICKLE_FILE = False +READ_PICKLE_FILE = True +PICKLE_FILENAME = os.path.join(os.path.dirname(__file__), 'tree_state.pickle') REMOTE_LOGIN = "gary.ruben@monash.edu@sftp.synchrotron.org.au" # SRC_PATH = "/data/13660a/asci/input" SRC_PATH = "/data/13660a/asci/output/tar_test" @@ -34,9 +40,11 @@ DEST_PATH = "/home/grub0002/bapcxi/vault/rubbish" @dataclass class Node: - src: str - dest: str - processed: bool = False + """A directory tree node""" + src: str # source tree node path + dest: str # destination tree node path + count: int = None # number of files at the node + processed: bool = False # True iff a node transfer completes def tar_and_send_directory(node): @@ -58,13 +66,13 @@ def tar_and_send_directory(node): with Connection(REMOTE_LOGIN) as c: files = c.run(f'cd {node.src}; find -type f -maxdepth 1') files = files.stdout.strip() - count = files.count('/') + node.count = files.count('/') - print(f'Node:{node.src}, file count:{count}') - if count == 0: + print(f'Node:{node.src}, file count:{node.count}') + if node.count == 0: # No files at this node, just return pass - elif count == 1: + elif node.count == 1: # Only one file. No need to tar. Just copy unchanged. output = subprocess.run( f"ssh {REMOTE_LOGIN} 'cd {node.src};" @@ -99,22 +107,35 @@ def tar_and_send_directory(node): node.processed = True if __name__ == "__main__": - # Get the directory tree from remote server as a list - with Connection(REMOTE_LOGIN) as c: - result = c.run(f'find {SRC_PATH} -type d') - remote_dirs = result.stdout.strip().split('\n') - - # Create a tree data structure that represents both source and destination - # tree paths. - tree = [] - for src in remote_dirs: - dest = src.replace(SRC_PATH, DEST_PATH) - tree.append(Node(src, dest)) + if not READ_PICKLE_FILE: + # Get the directory tree from remote server as a list + with Connection(REMOTE_LOGIN) as c: + result = c.run(f'find {SRC_PATH} -type d') + remote_dirs = result.stdout.strip().split('\n') + + # Create a tree data structure that represents both source and destination + # tree paths. + tree = [] + for src in remote_dirs: + dest = src.replace(SRC_PATH, DEST_PATH) + tree.append(Node(src, dest)) + else: + with open(PICKLE_FILENAME, 'rb') as f: + tree = pickle.load(f) + print('tree:') + pprint.pprint(tree) # Transfer all directory tree nodes for node in tree: - pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True) - os.chmod(node.dest, 0o770) - tar_and_send_directory(node) + if not node.processed: + pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True) + os.chmod(node.dest, 0o770) + tar_and_send_directory(node) + + # pickle the tree to keep a record of the processed state + with open(PICKLE_FILENAME, 'wb') as f: + pickle.dump(tree, f) + + # possibly delete the 'tree_state.pickle' file here - # from IPython import embed; embed() \ No newline at end of file + # from IPython import embed; embed() -- GitLab