Commit 2adc8740 authored by Gary Ruben's avatar Gary Ruben
Browse files

Tidying and documentation improvement

parent 56d7f629
"""
A script to transfer a tree of data files from a remote server to a local
computer. This only runs on a destination un*x system and requires an ssh key
pair to be shared between the systems. See
A script to transfer a tree of data files from a remote/source server to a
local/destination computer. This runs on the local Linux machine, on which the
tape archive system is mounted; in our case, this is a machine at Monash. Prior
to running this an ssh key pair must be shared between the systems. See
https://confluence.apps.monash.edu/display/XI/Australian+Synchrotron
for details on how to do this between a Monash Linux machine and ASCI.
for details on how to do this between a Monash Linux machine and ASCI
(Australian Synchrotron Compute Infrastructure). Requires Python 3.7 or higher
and uses the fabric module.
Authors:
gary.ruben@monash.edu
michelle.croughan@monash.edu
Note that current version creates two files in the same directory as this script
1. A .log file named based on the start-time timestamp which is a capture of all
stdout activity.
2. A Python pickle file named tree_state.pickle that contains the transfer state
from which failed transfers can be restarted by setting the READ_PICKLE_FILE
file to True.
Known issues
------------
Note: The current version of fabric generates warnings. This issue is discussed
here: https://github.com/paramiko/paramiko/issues/1369
Notes
-----
This is a possible option for checksumming.
https://stackoverflow.com/q/45819356/
KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}')
"""
import os
......@@ -15,31 +41,22 @@ import subprocess
import pickle
import pprint
import time
from fabric import Connection
# This isn't suppressing the warnings that fabric is generating; we need to
# investigate further
with warnings.catch_warnings():
warnings.simplefilter("ignore")
import fabric
from fabric import Connection
"""
This is a possible option for checksumming.
https://stackoverflow.com/q/45819356/
KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}')
"""
READ_PICKLE_FILE = False
EXPERIMENT_NAME = "13660a"
PICKLE_FILENAME = os.path.join(os.path.dirname(__file__), "tree_state.pickle")
timestamp = time.strftime("%Y-%m-%d-%H%M%S")
LOG_FILENAME = os.path.join(os.path.dirname(__file__), f"{EXPERIMENT_NAME}-{timestamp}.log")
LOG_FILENAME = os.path.join(
os.path.dirname(__file__),
f"{EXPERIMENT_NAME}-{timestamp}.log"
)
REMOTE_LOGIN = "gary.ruben@monash.edu@sftp.synchrotron.org.au"
# SRC_PATH = "/data/13660a/asci/input"
SRC_PATH = "/data/13660a/asci/output/tar_test"
DEST_PATH = "/home/grub0002/bapcxi/vault/rubbish"
SRC_PATH = "/data/13660a/asci/input"
# SRC_PATH = "/data/13660a/asci/output/tar_test"
# DEST_PATH = "/home/grub0002/bapcxi/vault/rubbish"
DEST_PATH = "/home/grub0002/bapcxi/vault/IMBL_2018_Oct_McGillick"
@dataclass
......@@ -124,22 +141,27 @@ def tar_and_send_directory(node):
node.processed = True
if __name__ == "__main__":
sys.stdout = Logger()
sys.stdout = Logger() # Log all stdout to a log file
# A hacky way to restart an interrupted transfer is to set
# READ_PICKLE_FILE = True above so that the transfer state is retrieved. By
# default the tree is built from scratch from the remote file system.
if not READ_PICKLE_FILE:
# Get the directory tree from remote server as a list
with Connection(REMOTE_LOGIN) as c:
result = c.run(f'find {SRC_PATH} -type d')
remote_dirs = result.stdout.strip().split('\n')
# Create a tree data structure that represents both source and destination
# tree paths.
# Create a tree data structure that represents both source and
# destination tree paths.
tree = []
for src in remote_dirs:
dest = src.replace(SRC_PATH, DEST_PATH)
tree.append(Node(src, dest))
else:
# Read the saved transfer state from the locally pickled tree object.
with open(PICKLE_FILENAME, 'rb') as f:
tree = pickle.load(f)
print('tree:')
......@@ -155,7 +177,3 @@ if __name__ == "__main__":
# pickle the tree to keep a record of the processed state
with open(PICKLE_FILENAME, 'wb') as f:
pickle.dump(tree, f)
# possibly delete the 'tree_state.pickle' file here
# from IPython import embed; embed()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment