Commit fe41e4aa authored by Tom Close's avatar Tom Close
Browse files

removed admin scripts into separate repo

parent a1d12687
mbi_to_daris = {'MRH001': 17,
'MRH003': 23,
'MRH006': 25,
'MRH007': 27,
'MRH009': 32,
'MRH011': 12,
'MRH015': 22,
'MRH018': 17,
'MRH019': 21,
'MRH023': 12,
'MRH026': 34,
'MRH025': 38,
'MRH028': 28,
'MRH031': 45,
'MRH040': 63,
'MRH041': 65,
'MRH042': 66,
'MRH037': 56,
'MRH047': 73,
'MRH038': 60,
'MRH039': 62,
'MRH048': 74,
'MRH049': 77,
'MRH045': 70,
'MRH044': 69,
'MRH059': 98,
'MRH012': 14,
'MRH022': 36,
'MRH058': 97,
'MRH062': 105,
'MRH043': 68,
'MRH061': 101,
'MRH071': 119,
'MRH077': 127,
'MRH055': 100,
'MRH069': 117,
'MRH070': 118,
'MRH057': 96,
'MRH065': 110,
'MRH051': 81,
'MRH075': 125,
'MRH027': 44,
'MRH030': 52,
'MRH036': 53,
'MRH032': 54,
'MRH032': 58,
'MRH063': 106,
'MRH056': 95,
'MRH081': 137,
'MRH079': 129,
'MRH067': 115,
'MRH072': 120,
'MRH074': 124,
'MRH033': 49,
'MRH000': 3,
'MRH017': 71,
'MRH021': 33,
'MRH024': 30,
'MRH034': 48,
'MRH035': 57,
'MRH046': 72,
'MRH054': 92,
'MRH060': 99,
'MRH064': 107,
'MRH066': 112,
'MRH068': 116,
'MRH073': 123,
'MRH076': 126,
'MRH078': 128,
'MRH082': 139,
'MRH083': 143,
'MRH084': 145,
'MRH086': 148,
'MRH087': 149,
'MRH088': 151,
'MRH089': 153,
'MRH090': 158,
'MRH092': 159,
'MMH001': 133,
'MMH002': 138,
'MMH003': 147,
'QC001': 16,
'MMA003': 146,
'MRA004': 20,
'MRA027': 130,
'MMO003': 142,
'MMO005': 152}
#!/usr/bin/env pypy
from argparse import ArgumentParser
import subprocess as sp
import os.path
from collections import defaultdict
from itertools import groupby, product
import dicom
import shutil
import tempfile
import getpass
import logging
from nianalysis.archive.daris.login import DarisLogin
from scripts._resources.mbi_to_daris_number import mbi_to_daris # @IgnorePep8 @UnresolvedImport
URL_PREFIX = 'file:/srv/mediaflux/mflux/volatile/stores/pssd/'
DARIS_STORE_PREFIX = '/mnt/rdsi/mf-data/stores/pssd'
XNAT_STORE_PREFIX = '/mnt/vicnode/archive/'
DATASET_TIME_TAG = ('0008', '0031')
ECHO_TIME_TAG = ('0018', '0081')
STUDY_NUM_TAG = ('0020', '0013')
SERIES_DESCR_TAG = ('0008', '103e')
IMAGE_TYPE_TAG = ('0008', '0008')
parser = ArgumentParser()
parser.add_argument('project', type=str,
help='ID of the project to import')
parser.add_argument('--log_file', type=str, default=None,
help='Path of the logfile to record discrepencies')
parser.add_argument('--subjects', type=int, nargs='+', default=None,
help="subjects to check")
parser.add_argument('--session', type=str, nargs=2, default=None,
metavar=('SUBJECT', 'SESSION'),
help=("The subject and session to check. If not provided "
"all sessions are checked"))
parser.add_argument('--dataset', type=int, default=None,
help=("The dataset to compare"))
parser.add_argument('--xnat_id', type=int, default=None,
help=("The id of the XNAT dataset to compare to if using "
"--dataset option"))
args = parser.parse_args()
log_path = args.log_file if args.log_file else os.path.join(
os.environ['HOME'], 'checksums',
'{}_checksum.log'.format(args.project))
logger = logging.getLogger('check_imported')
file_handler = logging.FileHandler(log_path)
file_handler.setLevel(logging.ERROR)
file_handler.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(file_handler)
stdout_handler = logging.StreamHandler()
stdout_handler.setLevel(logging.INFO)
stdout_handler.setFormatter(logging.Formatter(
"%(levelname)s - %(message)s"))
logger.addHandler(stdout_handler)
if args.project.startswith('MR') or args.project.startswith('MMO'):
modality = 'MR'
elif args.project.startswith('MM'):
modality = 'MRPT'
else:
assert False, "Unrecognised modality {}".format(args.project)
if args.xnat_id is not None and args.dataset is None:
raise Exception(
"'--xnat_id' option should only be used with '--dataset' option")
def extract_dicom_tag(fname, tag):
cmd = ("dcmdump {} | grep '({},{})' | head -n 1 | awk '{{print $3}}' | "
"sed 's/[][]//g'".format(fname, *tag))
return sp.check_output(cmd, shell=True)
def get_dataset_key(fname):
return (extract_dicom_tag(fname, DATASET_TIME_TAG),
extract_dicom_tag(fname, SERIES_DESCR_TAG))
def dataset_sort_key(daris_id):
return tuple(int(p) for p in daris_id.split('.'))
def session_group_key(daris_id):
return tuple(int(p) for p in daris_id.split('.')[:6])
def run_check(args, modality):
tmp_dir = tempfile.mkdtemp()
try:
with open('{}/.daris_password'.format(os.environ['HOME'])) as f:
password = f.read()
except OSError:
password = getpass.getpass("DaRIS manager password: ")
with DarisLogin(domain='system', user='manager',
password=password) as daris:
project_daris_id = mbi_to_daris[args.project]
if args.session is not None:
session_id_part = '.{}.1.{}'.format(*args.session)
else:
session_id_part = ''
datasets = daris.query(
"cid starts with '1008.2.{}{}' and model='om.pssd.dataset'"
.format(project_daris_id, session_id_part), cid_index=True)
cids = sorted(datasets.iterkeys(),
key=dataset_sort_key)
for session_id, dataset_cids in groupby(cids, key=session_group_key):
dataset_cids = list(dataset_cids) # Convert iterator to list
subject_id, method_id, study_id = (
int(p) for p in session_id[3:])
if args.subjects is not None:
if subject_id not in args.subjects:
continue
if method_id != 1:
print("Skipping session_id as its method != 1 ({})"
.format(method_id))
continue
match = True
# Create dictionary mapping study-id to archive paths
xnat_session = '{}_{:03}_{}{:02}'.format(
args.project, subject_id, modality, study_id)
xnat_session_path = xnat_path = os.path.join(
XNAT_STORE_PREFIX, args.project, 'arc001', xnat_session,
'SCANS')
if not os.path.exists(xnat_session_path):
logger.error('1008.2.{}.{}.1.{}: missing session {} ({})'
.format(mbi_to_daris[args.project], subject_id,
study_id, xnat_session,
xnat_session_path))
continue
dataset_key2xnat = {}
for dataset_id in os.listdir(xnat_session_path):
xnat_dataset_path = os.path.join(xnat_session_path,
str(dataset_id))
if 'DICOM' not in os.listdir(xnat_dataset_path):
print("Skipping non-DICOM dataset {}"
.format(xnat_dataset_path))
continue
xnat_dicom_path = os.path.join(xnat_dataset_path, 'DICOM')
try:
dataset_key = get_dataset_key(
os.path.join(xnat_dicom_path,
os.listdir(xnat_dicom_path)[0]))
except (IndexError, OSError):
logger.error('{} directory empty'
.format(xnat_dicom_path))
continue
if dataset_key in dataset_key2xnat:
assert False, (
"multiple acq times in {} ({} and {})".format(
xnat_session_path, xnat_dicom_path,
dataset_key2xnat[dataset_key]))
dataset_key2xnat[dataset_key] = xnat_dicom_path
# Unzip DaRIS datasets and compare with XNAT
match = True
for cid in dataset_cids:
if args.dataset is not None:
if int(cid.split('.')[-1]) != args.dataset:
continue
src_zip_path = os.path.join(
DARIS_STORE_PREFIX,
datasets[cid].url[len(URL_PREFIX):])
unzip_path = os.path.join(tmp_dir, cid)
os.mkdir(unzip_path)
print src_zip_path
sp.check_call('unzip -q {} -d {}'.format(src_zip_path,
unzip_path),
shell=True)
dataset_key = get_dataset_key(
os.path.join(unzip_path, '0001.dcm'))
if args.xnat_id is not None:
xnat_path = os.path.join(
xnat_session_path, str(args.xnat_id),
'DICOM')
else:
try:
xnat_path = dataset_key2xnat[dataset_key]
except KeyError:
logger.error('{}: missing dataset {}.{}'.format(
cid, xnat_session, cid.split('.')[-1]))
match = False
continue
if not compare_datasets(xnat_path, unzip_path, cid,
xnat_session, dataset_id):
match = False
shutil.rmtree(unzip_path, ignore_errors=True)
if match:
print('{}: matches ({})'.format(cid, xnat_session))
shutil.rmtree(tmp_dir)
logger.error('Finished check!')
class WrongEchoTimeOrImageTypeException(Exception):
pass
def compare_dicom_elements(xnat_elem, daris_elem, prefix, ns=None):
if ns is None:
ns = []
name = '.'.join(ns)
if isinstance(daris_elem, dicom.dataset.Dataset):
# Check to see if echo times match and throw WrongEchoTimeOrImageTypeException
# if they don't
if IMAGE_TYPE_TAG in daris_elem:
try:
if (daris_elem[IMAGE_TYPE_TAG].value !=
xnat_elem[IMAGE_TYPE_TAG].value):
raise WrongEchoTimeOrImageTypeException
except KeyError:
logger.error(
"{}xnat scan does not have image type while daris does")
return False
if ECHO_TIME_TAG in daris_elem:
try:
if (daris_elem[ECHO_TIME_TAG].value !=
xnat_elem[ECHO_TIME_TAG].value):
raise WrongEchoTimeOrImageTypeException
except KeyError:
logger.error(
"{}xnat scan does not have echo time while daris does")
return False
match = True
for d in daris_elem:
try:
x = xnat_elem[d.tag]
except KeyError:
logger.error("{}missing {}".format(prefix, d.name))
match = False
if not compare_dicom_elements(x, d, prefix, ns=ns + [d.name]):
match = False
return match
elif isinstance(daris_elem.value, dicom.sequence.Sequence):
if len(xnat_elem.value) != len(daris_elem.value):
logger.error(
"{}mismatching length of '{}' sequence (xnat:{} vs "
"daris:{})".format(prefix, name, len(xnat_elem.value),
len(daris_elem.value)))
return False
match = True
for x, d in zip(xnat_elem.value, daris_elem.value):
if not compare_dicom_elements(x, d, prefix, ns=ns):
match = False
return match
else:
if xnat_elem.name == 'Patient Comments':
# Skip patient comments containing xnat id string
return True
xnat_value = xnat_elem.value
daris_value = daris_elem.value
try:
xnat_value = xnat_value.strip()
daris_value = daris_value.strip()
except AttributeError:
pass
if xnat_value != daris_value:
include_diff = True
try:
if max(len(xnat_value), len(daris_value)) > 100:
include_diff = False
except TypeError:
pass
if include_diff:
diff = ('(xnat:{} vs daris:{})'
.format(xnat_value, daris_value))
else:
diff = ''
logger.error("{}mismatching value for '{}'{}".format(
prefix, name, diff))
return False
return True
def compare_datasets(xnat_path, daris_path, cid, xnat_session, dataset_id):
daris_files = [f for f in os.listdir(daris_path)
if f.endswith('.dcm')]
xnat_files = [f for f in os.listdir(xnat_path)
if f.endswith('.dcm')]
if len(daris_files) != len(xnat_files):
logger.error("{}: mismatching number of dicoms in dataset "
"{}.{} (xnat {} vs daris {})"
.format(cid, xnat_session, dataset_id,
len(xnat_files), len(daris_files)))
xnat_elem = dicom.read_file(os.path.join(xnat_path,
sorted(xnat_files)[0]))
daris_elem = dicom.read_file(os.path.join(daris_path,
sorted(daris_files)[0]))
compare_dicom_elements(
xnat_elem, daris_elem,
"{}: 1st dicoms in datasets don't match {}.{} -".format(
cid, xnat_session, dataset_id))
return False
xnat_fname_map = defaultdict(list)
for fname in xnat_files:
try:
dcm_num = int(fname.split('-')[-2])
except ValueError:
dcm_num = None # For some 3D application data
xnat_fname_map[dcm_num].append(fname)
max_mult = max(len(v) for v in xnat_fname_map.itervalues())
min_mult = max(len(v) for v in xnat_fname_map.itervalues())
if max_mult != min_mult:
logger.error("{}: Inconsistent numbers of echos in {}.{}"
.format(cid, xnat_session, dataset_id))
return False
daris_fname_map = defaultdict(list)
for fname in daris_files:
try:
dcm_num = int(extract_dicom_tag(os.path.join(daris_path, fname),
STUDY_NUM_TAG))
except ValueError:
dcm_num = None # For some 3D application data
daris_fname_map[dcm_num].append(fname)
if sorted(xnat_fname_map.keys()) != sorted(daris_fname_map.keys()):
logger.error("{}: DICOM instance IDs don't match "
"{}.{}:\nxnat: {}\ndaris: {}\n".format(
cid, xnat_session, dataset_id,
xnat_fname_map.keys(),
daris_fname_map.keys()))
return False
print xnat_path
print daris_path
for dcm_num in daris_fname_map:
num_echoes = len(daris_fname_map[dcm_num])
assert len(xnat_fname_map[dcm_num]) == num_echoes
# Try all combinations of echo times
for i, j in product(range(num_echoes), range(num_echoes)):
try:
daris_fpath = os.path.join(daris_path,
daris_fname_map[dcm_num][i])
try:
xnat_fpath = os.path.join(
xnat_path, xnat_fname_map[dcm_num][j])
except KeyError:
logger.error('{}: missing file ({}.{}.{})'.format(
cid, xnat_session, dataset_id, dcm_num))
return False
xnat_elem = dicom.read_file(xnat_fpath)
daris_elem = dicom.read_file(daris_fpath)
if not compare_dicom_elements(
xnat_elem, daris_elem,
'{}: dicom mismatch in {}.{}-{}, '.format(
cid, xnat_session, dataset_id, dcm_num)):
return False
except WrongEchoTimeOrImageTypeException:
# Try a different combination until echo times match
pass
return True
run_check(args, modality)
#!/usr/bin/env python
import os.path
import re
import tempfile
import shutil
import subprocess as sp
from argparse import ArgumentParser
from nianalysis.archive.daris.login import DarisLogin
from xnatutils import connect, data_format_exts
parser = ArgumentParser()
parser.add_argument('download', type=str,
help="The name of the downloaded dataset")
parser.add_argument('upload', type=str,
help="The name for the uploaded dataset")
parser.add_argument('--daris_project', default=88, type=int,
help="Daris project to download the datasets from")
parser.add_argument('--processed', default=False, action='store_true',
help="Whether the dataset is processed or acquired")
parser.add_argument('--subjects', nargs='+', type=int, default=None,
help="Subjects to copy the datasets from")
parser.add_argument('--sessions', nargs='+', type=int, default=None,
help="The sessions to copy the datasets from")
parser.add_argument('--xnat_project', default='MRH017', type=str,
help="The XNAT project to upload them to")
parser.add_argument('--work_dir', type=str,
help="Work directory to download files from")
parser.add_argument('--modality', default='MR',
help="Modality of dataset session for XNAT upload")
parser.add_argument('--overwrite', '-o', action='store_true', default=False,
help="Allow overwrite of existing dataset")
parser.add_argument('--data_format', default='nifti_gz',
help="The assumed data-format of the dataset")
args = parser.parse_args()
ex_method_id = args.processed + 1
ext = data_format_exts[args.data_format.upper()]
work_dir = tempfile.mkdtemp() if args.work_dir is None else args.work_dir
copied = []
with DarisLogin(domain='system') as mbi_daris, connect() as mbi_xnat:
if args.subjects is None:
subject_ids = list(mbi_daris.get_subjects(args.daris_project))
else:
subject_ids = args.subjects
for subject_id in subject_ids:
session_ids = set(mbi_daris.get_sessions(
args.daris_project, subject_id))
if args.sessions:
session_ids &= set(args.sessions)
for session_id in session_ids:
datasets = mbi_daris.get_datasets(args.daris_project, subject_id,
session_id=session_id,
ex_method_id=ex_method_id)
matching = [d for d in datasets.itervalues()
if re.match(args.download, d.name)]
cid = "1008.2.{}.{}.{}".format(
args.daris_project, subject_id, ex_method_id, session_id)
if len(matching) > 1:
print ("Could not distinguish between '{}' in session {}"
.format("', '".join(m.name for m in matching), cid))
elif matching:
match = matching[0]
xnat_session_id = '{}_{:03d}_{}{:02d}{}'.format(
args.xnat_project, subject_id, args.modality,
session_id, ('_PROC' if args.processed else ''))
src_dir = os.path.abspath(
os.path.join(work_dir, str(match.cid)))
session_dir = os.path.abspath(
os.path.join(work_dir, xnat_session_id))
target_dir = os.path.join(session_dir, args.upload)
os.makedirs(src_dir)
os.makedirs(target_dir)
path = os.path.join(src_dir, 'download.zip')
mbi_daris.download(path, args.daris_project, subject_id,
session_id=session_id, dataset_id=match.id)
orig_dir = os.getcwd()
os.chdir(src_dir)
sp.check_call('unzip -q download.zip', shell=True)
os.remove('download.zip')
for dir_path, _, fnames in os.walk(src_dir):
for fname in fnames:
fpath = os.path.join(dir_path, fname)
if os.path.isfile(fpath):
shutil.move(fpath, target_dir)
if args.data_format == 'zip':
os.chdir(session_dir)
sp.check_call('zip -rq {p}.zip {p}'.format(p=args.upload),
shell=True)
else:
raise NotImplementedError
os.chdir(orig_dir)
shutil.rmtree(src_dir)
xnat_session = mbi_xnat.experiments[xnat_session_id]
dataset = mbi_xnat.classes.MrScanData(
type=args.upload, parent=xnat_session)
resource = dataset.create_resource(args.data_format.upper())
resource.upload(
os.path.join(session_dir, args.upload + '.zip'),
args.upload + ext)
shutil.rmtree(session_dir)
copied.append(cid)
print "Uploaded {} to {}/{}".format(cid, xnat_session_id,
args.upload + ext)
else:
print ("Did not find matching dataset '{}' in {}".format(
args.download, cid))
print "Successfully copied {} datasets".format(len(copied))
#!/usr/bin/env python
from argparse import ArgumentParser
import subprocess as sp
import os.path
import shutil
import tempfile
import getpass
from nianalysis.archive.daris.login import DarisLogin
parser = ArgumentParser()
parser.add_argument('project', type=str,
help='ID of the project to import')
parser.add_argument('--subjects', nargs='+', default=None, type=int,
help="IDs of the subjects to import")
args = parser.parse_args()
from scripts._resources.mbi_to_daris_number import mbi_to_daris # @IgnorePep8 @UnresolvedImport
# fm2darisID = {
# 'MRH055': 100, 'MRH061': 101, 'MRH062': 105, 'MRH063': 106, 'MRH064': 107,
# 'MRH065': 110, 'MRH066': 112, 'MRH067': 115, 'MRH068': 116, 'MRH069': 117,
# 'MRH070': 118, 'MRH071': 119, 'MRH011': 12, 'MRH023': 12, 'MRH072': 120,
# 'MRH073': 123, 'MRH074': 124, 'MRH075': 125, 'MRH076': 126, 'MRH077': 127,
# 'MRH078': 128, 'MRH079': 129, 'MRH080': 134, 'MRH081': 137, 'MMH002': 138,
# 'MRH082': 139, 'MRH012': 14, 'MRH083': 143, 'MRH084': 145, 'MMH003': 147,
# 'MRH001': 17, 'MRH018': 17, 'MRH019': 21, 'MRH015': 22, 'MRH003': 23,
# 'MRH006': 25, 'MRH007': 27, 'MRH028': 28, 'MRH000': 3, 'MRH024': 30,
# 'MRH009': 32, 'MRH021': 33, 'MRH026': 34, 'MRH022': 36, 'MRH025': 38,
# 'MRH027': 44, 'MRH031': 45, 'MRH034': 48, 'MRH033': 49, 'MRH030': 52,
# 'MRH036': 53, 'MRH032': 54, 'MRH037': 56, 'MRH035': 57, 'MRH032': 58,
# 'MRH038': 60, 'MRH039': 62, 'MRH040': 63, 'MRH041': 65, 'MRH042': 66,
# 'MRH043': 68, 'MRH044': 69, 'MRH045': 70, 'MRH017': 71, 'MRH046': 72,
# 'MRH047': 73, 'MRH048': 74, 'MRH049': 77, 'MRH051': 81, 'MRH054': 92,
<