""" This module handles SSH Connections """ import subprocess TIMEOUT=60 class SshAgentException(Exception): def __init__(self,message=None): super(SshAgentException,self).__init__(message) """ Raised if an SSH Agent isn't running (Normally the SPA client should invoke the agent before trying to execute anythign or create any tunnels) """ class SshValidationException(Exception): """ Raised if any parameters passed to SSH don't look valid, eg hostnames are not hostnames, ports are not numbers, usernames are not the correct format """ pass class SshCtrlException(Exception): """ Raised if the ssh control socket is not valid. """ def __init__(self,message=None): super(SshCtrlException,self).__init__(message) class SshExecException(Exception): """ Raised if the ssh can't run a remote program. Possibly a bug on the other end """ def __init__(self,message=None): super(SshExecException,self).__init__(message) class SftpException(Exception): pass class SftpPermissionException(Exception): pass class Ssh: """ Ssh class can execute or create tunnelstat """ @staticmethod def validate_port(port): "validate that the port we're being asked to connect to is an integer" try: port = int(port) except: raise SshValidationException("port number {} was not an integer".format(port)) @staticmethod def validate_username(user): """ ensure that the username conforms to posix standards Some systems do not conform to this standard, and linux still works but for the moment, lets assume they do conform. """ import re username_re = re.compile(r'^[a-z_]([a-z0-9_-]{0,31}|[a-z0-9_\.-]{0,30}\$)$') if not username_re.match(user): raise SshValidationException("username {} is not valid according to " \ "posix standards".format(user)) @staticmethod def validate_hostname(host): "ensure that the hostname at least conforms to naming standards" import re hostname_re = re.compile(r'^[a-z0-9_\.-]+$') if not hostname_re.match(host): raise SshValidationException("hostname {} is not valid".format(host)) @staticmethod def validate_command(cmd): """So in theory, due the the format of the Popen command we can take any input as a commnand to execute on the login host, including ; and other forms of injection, and the worst that can happen is the user screws up their own account """ pass @staticmethod def get_ctrl_master_socket(sess, host, user, sshport): """ Returns the socket for the control master process Opens it if needed. """ import os import stat import logging logger = logging.getLogger() ctrlsocket = "/tmp/cm-{}-{}".format(user,host) try: mode = os.stat(ctrlsocket).st_mode except FileNotFoundError: mode = None if mode is None: sshcmd = ["ssh", '-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'IdentityFile=/dev/null', '-o', 'PreferredAuthentications=publickey', '-o', 'UpdateHostKeys=no', '-o', 'LogLevel=error', "-S", ctrlsocket, "-M", '-o', 'ControlPersist=10s', '-p', sshport, '-N','-l', user, host] env = os.environ.copy() if sess.socket is None: raise SshAgentException("No ssh-agent yet") env['SSH_AUTH_SOCK'] = sess.socket ctrl_p = subprocess.Popen(sshcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=None, env=env) #logger.debug("master spawned, attempt communicate") #stdout, stderr = ctrl_p.communicate() #logger.debug('communicate on the control port complete') sess.pids.append(ctrl_p.pid) sess.ctrl_processes[ctrlsocket] = ctrl_p try: mode = os.stat(ctrlsocket).st_mode except FileNotFoundError: import time time.sleep(5) try: mode = os.stat(ctrlsocket).st_mode except FileNotFoundError: mode = None error = ctrl_p.stderr.read() logger.error(error) raise SshCtrlException(error.decode()) if not stat.S_ISSOCK(mode): error = ctrl_p.stderr.read() logger.error(error) raise SshCtrlException(error.decode()) return ctrlsocket @staticmethod def parse_sftp_output(output): import logging logger = logging.getLogger() rv = [] pwd = None import dateutil.parser for l in output.splitlines(): fields = l.split(None,8) fd = {} if len(fields) == 9: fd['name'] = fields[8] fd['mode'] = fields[0] fd['hardlinks'] = fields[1] fd['user'] = fields[2] fd['group'] = fields[3] fd['size'] = fields[4] fd['mtime'] = dateutil.parser.parse("{} {} {}".format(fields[5],fields[6],fields[7])).isoformat() fd['name'] = fields[8] rv.append(fd) remotestr = "Remote working directory: " if remotestr in l: pwd = l[len(remotestr):].rstrip() return ({'pwd':pwd,'files':rv}) @staticmethod def sftpmkdir(sess, host, user, path, name, sshport): """ Use sftp to run mkdir. """ import os import logging logger = logging.getLogger() env = os.environ.copy() if path is None: path = "." if sess.socket is None: raise SshAgentException("No ssh-agent yet") env['SSH_AUTH_SOCK'] = sess.socket Ssh.validate_username(user) Ssh.validate_hostname(host) ctrlsocket = Ssh.get_ctrl_master_socket(sess, host, user, sshport) exec_p = subprocess.Popen(['sftp', '-b', '-','-o', 'Stricthostkeychecking=no', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'UpdateHostKeys=no', '-o', 'LogLevel=error', '-P', sshport, '-o', 'ControlPath={}'.format(ctrlsocket), '{}@{}'.format(user, host)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, env=env) sftpcmd="cd {}\n mkdir \"{}\"\n".format(path,name) (stdout, stderr) = exec_p.communicate(sftpcmd.encode()) if stderr is not None: if not (stderr == b''): logger.error('sftp failure') logger.error(stdout.decode()) logger.error(stderr.decode()) if ('Couldn\'t create directory: Failure' in stderr.decode()): raise SftpPermissionException() return if ('Couldn\'t canonicalize: No such file or directory' in stderr.decode()): logger.error('can\'t change to that directory') raise SftpException() if ('Permission denied' in stderr.decode()): logger.error('can\'t change to that directory') raise SftpPermissionException() logger.error(stdout.decode()) logger.error(stderr.decode()) raise SftpException() return @staticmethod def sftpls(sess, host, user, sshport, path=".",changepath="."): """ Use sftp to run an ls on the given path. Return the directory listing (as a list) and the cwd """ import os import logging logger = logging.getLogger() env = os.environ.copy() if sess.socket is None: raise SshAgentException("No ssh-agent yet") env['SSH_AUTH_SOCK'] = sess.socket Ssh.validate_username(user) Ssh.validate_hostname(host) ctrlsocket = Ssh.get_ctrl_master_socket(sess, host, user, sshport) if (path is None or path == ""): path="." if (changepath is None or changepath == ""): changepath="." exec_p = subprocess.Popen(['sftp', '-b', '-','-o', 'Stricthostkeychecking=no', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'UpdateHostKeys=no', '-o', 'LogLevel=error', '-P', sshport, '-o', 'ControlPath={}'.format(ctrlsocket), '{}@{}'.format(user, host)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, env=env) sftpcmd="cd {}\n cd {}\n pwd\nls -al\n".format(path,changepath) (stdout, stderr) = exec_p.communicate(sftpcmd.encode()) if stderr is not None: if not (stderr == b''): logger.error('sftp failure') if ('Couldn\'t canonicalize: No such file or directory' in stderr.decode()): logger.error('can\'t change to that directory') return Ssh.sftpls(sess,host,user,sshport, path,changepath='.') if ('Permission denied' in stderr.decode()): logger.error('can\'t change to that directory') return Ssh.sftpls(sess,host,user,sshport, path,changepath='.') logger.error(stderr.decode()) logger.error(('Permission denied' in stderr.decode())) logger.error('Couldn\'t canonicalize: No such file or directory' in stderr.decode()) raise SshCtrlException(stderr.decode()) dirlist = Ssh.parse_sftp_output(stdout.decode()) return dirlist @staticmethod def execute(sess, host, user, cmd, bastion=None, stdin=None, sshport="22", bastionsshport="22"): """ execute the command cmd on the host via ssh # assume the environment is already setup with an # SSH_AUTH_SOCK that allows login """ import os import logging logger = logging.getLogger() if cmd is None and stdin is None: return {'stdout': b'', 'stderr': b'No command given to execute'} env = os.environ.copy() if sess.socket is None: raise SshAgentException("No ssh-agent yet") env['SSH_AUTH_SOCK'] = sess.socket if ":" in host: host,sshport = host.split(':') if bastion is not None and ":" in bastion: bastion,bastionsshport = bastion.split(':') Ssh.validate_username(user) Ssh.validate_hostname(host) Ssh.validate_command(cmd) if bastion == None: ctrlsocket = Ssh.get_ctrl_master_socket(sess, host, user, sshport) else: ctrlsocket = Ssh.get_ctrl_master_socket(sess, bastion, user, bastionsshport) if bastion == None: # we are executing this command on the login node, so no more magic is needed sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'IdentityFile=/dev/null', '-o', 'PreferredAuthentications=publickey', '-o', 'UpdateHostKeys=no', '-o', 'LogLevel=error', '-o', 'ConnectTimeout=10', '-S', ctrlsocket, '-p', sshport, '-l', user, host, cmd] else: # we are executing on a node (e.g. a compute/batch node) using a bastion (e.g. login node) # at the moment I'll assume the ssh port for the batch host is the same as the ssh port for the bastion/login host proxycmd = "ssh -o ConnectTimeout=10 -o UserKnownHostsFile=/dev/null -o UpdateHostKeys=no -o LogLevel=error -o Stricthostkeychecking=no {user}@{bastion} -W {host}:{sshport} -S {ctrlsocket}".format( user=user, host=host, ctrlsocket=ctrlsocket, sshport=sshport, bastion=bastion) sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no', '-o', "ProxyCommand={}".format(proxycmd), '-o', 'UserKnownHostsFile=/dev/null', '-o', 'IdentityFile=/dev/null', '-o', 'PreferredAuthentications=publickey', '-o', 'UpdateHostKeys=no', '-o', 'LogLevel=error', '-o', 'ConnectTimeout=10', '-p', bastionsshport, '-l', user, bastion, cmd] exec_p = subprocess.Popen(sshcmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE,stdin=subprocess.PIPE,env=env) try: if stdin is not None: (stdout, stderr) = exec_p.communicate(stdin.encode(),timeout=TIMEOUT) else: (stdout, stderr) = exec_p.communicate(timeout=TIMEOUT) except subprocess.TimeoutExpired as e: raise SshExecException(message="A program timed out on the backend server") if exec_p.returncode == 255: sess.kill() raise SshCtrlException(message="Ssh was unable to login. It's likely you're credentials need to be renewed by loggin in again") if exec_p.returncode != 0 or b'Control socket connect' in stderr: try: ctrl_p = sess.ctrl_processes[ctrlsocket] except KeyError as e: # If the process is restarted and the agents all get restarted, but the control socket /tmp/cm-* is not removed # This can happen import os os.unlink(ctrlsocket) raise SshCtrlException(message="ssh execute failed, there was a problem with the control socket. Did the docker backend get restartd?") logger.error('ran command {}'.format(" ".join(sshcmd))) logger.error('got return code {} stderr {} stdout {}'.format(exec_p.returncode,stderr,stdout)) raise SshExecException(message="A program failed to execute on the backend server.\n It gave an error message\n{}.\nHopefully this is helpful".format(stderr)) #logger.debug('ssh execut failed {} {}'.format(exec_p.returncode, stderr)) #sess.kill() #raise SshCtrlException(message="ssh execute failed, killing off the agent. Log in again") # If the return code is non-zero, its likely something went wrong logging in. We should perhaps consider killing the ssh session, or at least the ctrl socket processes # And allowing the client to retry them # I haven't decided yet whether we should kill the whole agent or just the ctrl process # Because people often put stuff in their bashrc which casuses stderr to be non-empty, we will swallow errors in the bashrc without comment if b'bashrc' in stderr: return {'stdout':stdout,'stderr':b''} else: return {'stdout':stdout, 'stderr':stderr} @staticmethod def tunnel(sess, port, batchhost, user, host, internalfirewall = True, localbind = True, authtok = None, sshport="22"): """ the ProxyCommand is used if the server we run on the batch host is only addressable on localhost e.g. jupyter in its default config runs on localhost:8888 so a web browser on the login node can not connect to it If the server port is directly accessible from the login host (eg the ssh server port is directly accessible) or the batchhost IS the login host (i.e. batchhost = localhost) we can use -O forward -S <control socket> """ import os import logging logger = logging.getLogger() env = os.environ.copy() env['SSH_AUTH_SOCK'] = sess.socket if ":" in host: host,sshport = host.split(':') Ssh.validate_port(port) Ssh.validate_hostname(batchhost) Ssh.validate_username(user) Ssh.validate_hostname(host) ctrlsocket = Ssh.get_ctrl_master_socket(sess, host, user, sshport) localport = Ssh.get_free_port() if port == 22 or (not internalfirewall and not localbind): # Can we use the existing control master connection and just add a # port forward to the batch node sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no', '-N', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'IdentityFile=/dev/null', '-o', 'PreferredAuthentications=publickey', '-o', 'UpdateHostKeys=no', '-o', 'LogLevel=error', '-L', '{localport}:{batchhost}:{port}'. format(port=port, localport=localport, batchhost=batchhost), '-O', 'forward', '-S', ctrlsocket, '-p', sshport, '-l', user, host] else: # Create an ssh tunnel to the batch node using a proxycommand. # The proxy command should utilise # the existing control master connection proxycmd = "ssh -o UserKnownHostsFile=/dev/null -o UpdateHostKeys=no -o LogLevel=error -o Stricthostkeychecking=no {user}@{host} -W {batchhost}:22 -S {ctrlsocket}".format( user=user, host=host, ctrlsocket=ctrlsocket, batchhost=batchhost) sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no', '-N', '-L', '{localport}:localhost:{port}'. format(port=port, localport=localport), '-o', 'UserKnownHostsFile=/dev/null', '-o', 'IdentityFile=/dev/null', '-o', 'PreferredAuthentications=publickey', '-o', 'UpdateHostKeys=no', '-o', 'LogLevel=error', '-o', "ProxyCommand={}".format(proxycmd), '-p', sshport, '-l', user, batchhost] tunnel_p = subprocess.Popen(sshcmd, env=env) try: if not Ssh.wait_for_tunnel(localport): logger.error('tunnel failed to open') (stdout,stderr) = tunnel_p.communicate() logger.error('Ssh.tunnel: stdout {}'.format(stdout.decode())) logger.error('Ssh.tunnel: stderr {}'.format(stderr.decode())) raise SshCtrlException(message="failed to open a tunnel") except SshCtrlException as e: raise(e) except Exception as e: import traceback logger.error('exception waiting for tunnel') logger.error(e) logger.error(traceback.format_exc()) return None, [] if sess.port is None: sess.port = {} sess.port.update({authtok: localport}) sess.pids.append(tunnel_p.pid) return localport, [tunnel_p.pid] # @staticmethod # def addkey(sess, key, cert): # "" # pass @staticmethod def wait_for_tunnel(localport): notopen = True import socket import time import logging logger = logging.getLogger() while notopen: ssock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ssock.setblocking(True) try: ssock.connect(('127.0.0.1', localport)) notopen = False ssock.close() except socket.error: ssock.close() return True """ In order to avoid a race condition, we wait for the tunnel to be established If the tunnel hasn't been establised before TIMEOUT, perhaps something has gone wrong """ """ import socket import time import logging logger = logging.getLogger() ssock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ssock.settimeout(TIMEOUT) for retry in range(0,10): try: ssock.connect(('127.0.0.1', localport)) ssock.close() return True except Exception as e: logger.debug('socket exception {}'.format(e)) ssock.close() time.sleep(1) return False """ @staticmethod def get_free_port(): """ # Finds a port which the local server can listen on. """ import socket serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) for testport in range(1025, 65500): try: serversocket.bind(('127.0.0.1', testport)) port = testport serversocket.close() return port except OSError: pass