Skip to content
Snippets Groups Projects
Commit 711a751c authored by Gary Ruben's avatar Gary Ruben
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
Pipeline #6907 canceled
"""
A script to transfer a tree of data files from a remote server to a local
computer. This only runs on a destination un*x system and requires an ssh key
pair to be shared between the systems. See
https://confluence.apps.monash.edu/display/XI/Australian+Synchrotron
for details on how to do this between a Monash Linux machine and ASCI.
"""
import os
import warnings
from dataclasses import dataclass
import pathlib
# This isn't suppressing the warnings that fabric is generating; we need to
# investigate further
with warnings.catch_warnings():
warnings.simplefilter("ignore")
import fabric
from fabric import Connection
"""
This is a possible option for checksumming.
https://stackoverflow.com/q/45819356/
KERNEL_CHECKSUM=$(cpio --to-stdout -i kernel.fat16 < archive.cpio | sha256sum | awk '{print $1}')
"""
REMOTE_LOGIN = "gary.ruben@monash.edu@sftp.synchrotron.org.au"
SRC_PATH = "/data/13660a/asci/output/tar_test"
DEST_PATH = "/home/grub0002/bapcxi/vault/rubbish"
@dataclass
class Node:
src: str
dest: str
def tar_and_send_directory(node):
"""Sends all files in the node.src directory to the node.dest directory
across an ssh connection using the cpio command to tar the files into a
single tarball. The destination tarball is named after the directories
trailing the SRC_PATH. Permissions are set to r_x for group and owner.
Args:
node: Node object
Contains source and destination directories as follows:
src: full path to a remote node
e.g. /data/13660a/asci/input
dest: full path to a destination node
e.g. /home/grub0002/bapcxi/vault/imbl2018
"""
if node.src == SRC_PATH:
filename = os.path.basename(node.src)
else:
filename = node.src.replace(SRC_PATH+'/', '').replace('/', '_')
os.system(
f"ssh {REMOTE_LOGIN} 'cd {node.src};"
f"find -type f -maxdepth 1 -print0 | cpio -o -H ustar -0' |"
f"cat > {node.dest}/{filename}.tar"
)
os.chmod(f'{node.dest}/{filename}.tar', 0o550)
print(f'transfer {node.src} -> {node.dest}')
if __name__ == "__main__":
# Get the directory tree from remote server as a list
with Connection(REMOTE_LOGIN) as c:
result = c.run(f'find {SRC_PATH} -type d')
remote_dirs = result.stdout.strip().split('\n')
# Create a tree data structure that represents both source and destination
# tree paths.
tree = []
for src in remote_dirs:
dest = src.replace(SRC_PATH, DEST_PATH)
tree.append(Node(src, dest))
# Transfer all directory tree nodes
for node in tree:
pathlib.Path(node.dest).mkdir(parents=True, exist_ok=True)
os.chmod(node.dest, 0o770)
tar_and_send_directory(node)
# from IPython import embed; embed()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment