Commit 81d63459 authored by Gary Ruben's avatar Gary Ruben
Browse files

Added file count to node object and got tree pickling recovery working

parent 153e18de
......@@ -11,6 +11,9 @@ import warnings
from dataclasses import dataclass
import pathlib
import subprocess
import pickle
import pprint
# This isn't suppressing the warnings that fabric is generating; we need to
# investigate further
......@@ -26,6 +29,9 @@ https://stackoverflow.com/q/45819356/
KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}')
"""
READ_PICKLE_FILE = False
READ_PICKLE_FILE = True
PICKLE_FILENAME = os.path.join(os.path.dirname(__file__), 'tree_state.pickle')
REMOTE_LOGIN = "gary.ruben@monash.edu@sftp.synchrotron.org.au"
# SRC_PATH = "/data/13660a/asci/input"
SRC_PATH = "/data/13660a/asci/output/tar_test"
......@@ -34,9 +40,11 @@ DEST_PATH = "/home/grub0002/bapcxi/vault/rubbish"
@dataclass
class Node:
src: str
dest: str
processed: bool = False
"""A directory tree node"""
src: str # source tree node path
dest: str # destination tree node path
count: int = None # number of files at the node
processed: bool = False # True iff a node transfer completes
def tar_and_send_directory(node):
......@@ -58,13 +66,13 @@ def tar_and_send_directory(node):
with Connection(REMOTE_LOGIN) as c:
files = c.run(f'cd {node.src}; find -type f -maxdepth 1')
files = files.stdout.strip()
count = files.count('/')
node.count = files.count('/')
print(f'Node:{node.src}, file count:{count}')
if count == 0:
print(f'Node:{node.src}, file count:{node.count}')
if node.count == 0:
# No files at this node, just return
pass
elif count == 1:
elif node.count == 1:
# Only one file. No need to tar. Just copy unchanged.
output = subprocess.run(
f"ssh {REMOTE_LOGIN} 'cd {node.src};"
......@@ -99,22 +107,35 @@ def tar_and_send_directory(node):
node.processed = True
if __name__ == "__main__":
# Get the directory tree from remote server as a list
with Connection(REMOTE_LOGIN) as c:
result = c.run(f'find {SRC_PATH} -type d')
remote_dirs = result.stdout.strip().split('\n')
# Create a tree data structure that represents both source and destination
# tree paths.
tree = []
for src in remote_dirs:
dest = src.replace(SRC_PATH, DEST_PATH)
tree.append(Node(src, dest))
if not READ_PICKLE_FILE:
# Get the directory tree from remote server as a list
with Connection(REMOTE_LOGIN) as c:
result = c.run(f'find {SRC_PATH} -type d')
remote_dirs = result.stdout.strip().split('\n')
# Create a tree data structure that represents both source and destination
# tree paths.
tree = []
for src in remote_dirs:
dest = src.replace(SRC_PATH, DEST_PATH)
tree.append(Node(src, dest))
else:
with open(PICKLE_FILENAME, 'rb') as f:
tree = pickle.load(f)
print('tree:')
pprint.pprint(tree)
# Transfer all directory tree nodes
for node in tree:
pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True)
os.chmod(node.dest, 0o770)
tar_and_send_directory(node)
if not node.processed:
pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True)
os.chmod(node.dest, 0o770)
tar_and_send_directory(node)
# pickle the tree to keep a record of the processed state
with open(PICKLE_FILENAME, 'wb') as f:
pickle.dump(tree, f)
# possibly delete the 'tree_state.pickle' file here
# from IPython import embed; embed()
\ No newline at end of file
# from IPython import embed; embed()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment