Skip to content
Snippets Groups Projects

Test

Merged Chris Hines requested to merge test into prod
1 file
+ 9
4
Compare changes
  • Side-by-side
  • Inline
+ 166
88
@@ -3,7 +3,6 @@ This module handles SSH Connections
"""
import subprocess
TIMEOUT=60
class SshAgentException(Exception):
def __init__(self,message=None):
super(SshAgentException,self).__init__(message)
@@ -43,6 +42,20 @@ class Ssh:
"""
Ssh class can execute or create tunnelstat
"""
TIMEOUT=60
SSHOPTS = [ '-o', 'StrictHostKeyChecking=no',
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'IdentityFile=/dev/null',
'-o', 'PreferredAuthentications=publickey',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
# '-o', 'ServerAliveInterval=30',
# '-o', 'ServerAliveCountMax=4'
]
SSHCMDWITHOPTS = ['ssh']
SSHCMDWITHOPTS.extend(SSHOPTS)
SFTPCMDWITHOPTS = ['sftp']
SFTPCMDWITHOPTS.extend(SSHOPTS)
@staticmethod
def validate_port(port):
@@ -92,26 +105,56 @@ class Ssh:
import stat
import logging
logger = logging.getLogger()
sess.lock.acquire()
ctrlsocket = "/tmp/cm-{}-{}".format(user,host)
try:
mode = os.stat(ctrlsocket).st_mode
except FileNotFoundError:
# If the control process died (docker restarted, or otherwise exited) but the socket was not removed:
if mode is not None and ctrlsocket not in sess.ctrl_processes:
os.unlink(ctrlsocket)
mode = None
if mode is not None and sess.ctrl_processes[ctrlsocket].returncode is not None:
ctrl_p = sess.ctrl_processes[ctrlsocket]
logger.debug('The control socket exists, but the process has returned {}'.format(ctrl_p.returncode))
(stderr,stdout) = ctrl_p.communicate()
logger.debug('ctrl_p stderr {}'.format(stderr))
logger.debug('ctrl_p stdout {}'.format(stdout))
os.unlink(ctrlsocket)
mode = None
# This is expected: It indicates there is not yet a control master for this user on this login node
except FileNotFoundError as e:
mode = None
except Exception as e:
logger.error('attempting to access control master socket raised exception')
logger.error(e)
import traceback
logger.error(traceback.format_exc())
if mode is None:
sshcmd = ["ssh", '-o', 'StrictHostKeyChecking=no',
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'IdentityFile=/dev/null',
'-o', 'PreferredAuthentications=publickey',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
# I an existing control process is recoreded, but we are starting a new one, try to delete the old one
if ctrlsocket in sess.ctrl_processes:
try:
(stderr,stdout) = sess.ctrl_processes[ctrlsocket].communicate()
logger.debug("control master terminated with stderr {}".format(stderr))
logger.debug("control master terminated with stdout {}".format(stdout))
os.killpg(int(sess.ctrl_processes[ctrlsocket].pid),signal.SIGKILL)
sess.ctrl_processes[ctrlsocket].communicate()
del sess.ctrl_processes[ctrlsocket]
except:
pass
sshcmd = Ssh.SSHCMDWITHOPTS.copy()
sshcmd.extend([
"-S", ctrlsocket,
"-M", '-o', 'ControlPersist=10s',
'-p', sshport, '-N','-l', user, host]
"-M", '-o', 'ControlPersist=60s',
'-p', sshport, '-N','-l', user, host])
env = os.environ.copy()
if sess.socket is None:
sess.lock.release()
raise SshAgentException("No ssh-agent yet")
env['SSH_AUTH_SOCK'] = sess.socket
logger.debug("creating master socket")
logger.debug(" ".join(sshcmd))
ctrl_p = subprocess.Popen(sshcmd,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
stdin=None,
@@ -119,24 +162,48 @@ class Ssh:
#logger.debug("master spawned, attempt communicate")
#stdout, stderr = ctrl_p.communicate()
#logger.debug('communicate on the control port complete')
sess.pids.append(ctrl_p.pid)
logger.debug("spanwed ssh mux with pid {}".format(ctrl_p.pid))
#sess.pids.append(ctrl_p.pid)
if ctrlsocket in sess.ctrl_processes:
logger.debug('existing control process!!!')
old_ctrl_p = sess.ctrl_processes[ctrlsocket]
old_ctrl_p.poll()
if old_ctrl_p.returncode is not None:
logger.debug('old ctrl_p is still running?')
old_ctrl_p.kill()
(stderr,stdout) = old_ctrl_p.communicate()
logger.debug('{} {}'.format(stderr,stdout))
sess.ctrl_processes[ctrlsocket] = ctrl_p
try:
mode = os.stat(ctrlsocket).st_mode
except FileNotFoundError:
import time
time.sleep(5)
try:
mode = os.stat(ctrlsocket).st_mode
except FileNotFoundError:
mode = None
error = ctrl_p.stderr.read()
logger.error(error)
raise SshCtrlException(error.decode())
if not stat.S_ISSOCK(mode):
error = ctrl_p.stderr.read()
logger.error(error)
raise SshCtrlException(error.decode())
notstarted = True
notdead = True
wait=0
while notstarted and notdead:
import time
ctrl_p.poll()
if ctrl_p.returncode != None:
notdead = False
(stdout,stderr) = ctrl_p.communicate()
logger.error('ctrl_p died {} {} {}'.format(ctrl_p.returncode,stdout,stderr))
sess.lock.release()
raise SshCtrlException(stderr.decode())
try:
mode = os.stat(ctrlsocket).st_mode
logger.debug('control socket appeared, it must have started')
notstarted=False
except FileNotFoundError:
logger.debug('ctrl_p not started yet')
time.sleep(0.5)
wait=wait+1
if wait>60:
ctrl_p.kill()
(stdout,stderr) = ctrl_p.communicate()
logger.error('ctrl_p was killed due to timeout {} {} {}'.format(ctrl_p.returncode,stdout,stderr))
sess.lock.release()
raise SshCtrlException(stderr.decode())
logger.debug('exiting if section')
sess.lock.release()
logger.debug('leaving get_ctrl_master_socket {}'.format(sess.ctrl_processes[ctrlsocket].returncode))
return ctrlsocket
@staticmethod
@@ -183,13 +250,11 @@ class Ssh:
Ssh.validate_username(user)
Ssh.validate_hostname(host)
ctrlsocket = Ssh.get_ctrl_master_socket(sess, host, user, sshport)
exec_p = subprocess.Popen(['sftp', '-b', '-','-o', 'Stricthostkeychecking=no',
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
'-P', sshport, '-o', 'ControlPath={}'.format(ctrlsocket),
'{}@{}'.format(user, host)],
sftpcmd = Ssh.SFTPCMDWITHOPTS.copy()
sftpcmd.extend(['-P', sshport,
'-o', 'ControlPath={}'.format(ctrlsocket),
'{}@{}'.format(user, host)])
exec_p = subprocess.Popen(sftpcmd,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
stdin=subprocess.PIPE, env=env)
@@ -235,12 +300,11 @@ class Ssh:
path="."
if (changepath is None or changepath == ""):
changepath="."
exec_p = subprocess.Popen(['sftp', '-b', '-','-o', 'Stricthostkeychecking=no',
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
'-P', sshport, '-o', 'ControlPath={}'.format(ctrlsocket),
'{}@{}'.format(user, host)],
sftpcmd = Ssh.SFTPCMDWITHOPTS.copy()
sftpcmd.extend([ '-P', sshport,
'-o', 'ControlPath={}'.format(ctrlsocket),
'{}@{}'.format(user, host)])
exec_p = subprocess.Popen(sftpcmd,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
stdin=subprocess.PIPE, env=env)
@@ -293,60 +357,61 @@ class Ssh:
ctrlsocket = Ssh.get_ctrl_master_socket(sess, host, user, sshport)
else:
ctrlsocket = Ssh.get_ctrl_master_socket(sess, bastion, user, bastionsshport)
logger.debug('in execute, got my ctrlsocket {}'.format(sess.ctrl_processes[ctrlsocket].pid))
if bastion == None:
# we are executing this command on the login node, so no more magic is needed
sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no',
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'IdentityFile=/dev/null',
'-o', 'PreferredAuthentications=publickey',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
'-o', 'ConnectTimeout=10',
sshcmd = Ssh.SSHCMDWITHOPTS.copy()
sshcmd.extend([
'-S', ctrlsocket, '-p', sshport,
'-l', user, host, cmd]
'-l', user, host, cmd])
else:
# we are executing on a node (e.g. a compute/batch node) using a bastion (e.g. login node)
# at the moment I'll assume the ssh port for the batch host is the same as the ssh port for the bastion/login host
proxycmd = "ssh -o ConnectTimeout=10 -o UserKnownHostsFile=/dev/null -o UpdateHostKeys=no -o LogLevel=error -o Stricthostkeychecking=no {user}@{bastion} -W {host}:{sshport} -S {ctrlsocket}".format(
proxycmd = " ".join(Ssh.SSHCMDWITHOPTS)
proxycmd = proxycmd + " {user}@{bastion} -W {host}:{sshport} -S {ctrlsocket}".format(
user=user, host=host,
ctrlsocket=ctrlsocket,
sshport=sshport,
bastion=bastion)
sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no',
sshcmd = Ssh.SSHCMDWITHOPTS.copy()
sshcmd.extend([
'-o', "ProxyCommand={}".format(proxycmd),
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'IdentityFile=/dev/null',
'-o', 'PreferredAuthentications=publickey',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
'-o', 'ConnectTimeout=10',
'-p', bastionsshport, '-l', user, bastion, cmd]
'-p', str(bastionsshport), '-l', user, bastion, cmd])
logger.debug('in execute, opening process {}'.format(sshcmd))
exec_p = subprocess.Popen(sshcmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE,stdin=subprocess.PIPE,env=env)
logger.debug('in execute pid {}'.format(exec_p.pid))
try:
if stdin is not None:
(stdout, stderr) = exec_p.communicate(stdin.encode(),timeout=TIMEOUT)
logger.debug('begining comms with timeout')
(stdout, stderr) = exec_p.communicate(stdin.encode(),timeout=Ssh.TIMEOUT)
else:
(stdout, stderr) = exec_p.communicate(timeout=TIMEOUT)
logger.debug('begining comms with timeout')
(stdout, stderr) = exec_p.communicate(timeout=Ssh.TIMEOUT)
except subprocess.TimeoutExpired as e:
logger.debug('cmd timed out')
raise SshExecException(message="A program timed out on the backend server")
if exec_p.returncode == 255:
logger.error(stderr)
sess.kill()
raise SshCtrlException(message="Ssh was unable to login. It's likely you're credentials need to be renewed by loggin in again")
if exec_p.returncode != 0 or b'Control socket connect' in stderr:
try:
ctrl_p = sess.ctrl_processes[ctrlsocket]
except KeyError as e:
# If the process is restarted and the agents all get restarted, but the control socket /tmp/cm-* is not removed
# This can happen
import os
os.unlink(ctrlsocket)
raise SshCtrlException(message="ssh execute failed, there was a problem with the control socket. Did the docker backend get restartd?")
# this error condition should now be handled in get_control_socket
# if exec_p.returncode != 0 or b'Control socket connect' in stderr:
# try:
# ctrl_p = sess.ctrl_processes[ctrlsocket]
# except KeyError as e:
# # If the process is restarted and the agents all get restarted, but the control socket /tmp/cm-* is not removed
# # This can happen
# import os
# try:
# os.unlink(ctrlsocket)
# except FileNotFoundError as e:
# pass
# raise SshCtrlException(message="ssh execute failed, there was a problem with the control socket. Did the docker backend get restartd?")
logger.error('ran command {}'.format(" ".join(sshcmd)))
logger.error('got return code {} stderr {} stdout {}'.format(exec_p.returncode,stderr,stdout))
raise SshExecException(message="A program failed to execute on the backend server.\n It gave an error message\n{}.\nHopefully this is helpful".format(stderr))
#logger.debug('ssh execut failed {} {}'.format(exec_p.returncode, stderr))
#sess.kill()
#raise SshCtrlException(message="ssh execute failed, killing off the agent. Log in again")
# If the return code is non-zero, its likely something went wrong logging in. We should perhaps consider killing the ssh session, or at least the ctrl socket processes
@@ -382,41 +447,38 @@ class Ssh:
Ssh.validate_username(user)
Ssh.validate_hostname(host)
ctrlsocket = Ssh.get_ctrl_master_socket(sess, host, user, sshport)
logger.debug('in tunnel, got my ctrlsocket {}'.format(sess.ctrl_processes[ctrlsocket].pid))
localport = Ssh.get_free_port()
if port == 22 or (not internalfirewall and not localbind):
# Can we use the existing control master connection and just add a
# port forward to the batch node
sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no', '-N',
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'IdentityFile=/dev/null',
'-o', 'PreferredAuthentications=publickey',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
'-L', '{localport}:{batchhost}:{port}'.
sshcmd = Ssh.SSHCMDWITHOPTS.copy()
sshcmd.extend([
'-L', '127.0.0.1:{localport}:{batchhost}:{port}'.
format(port=port, localport=localport, batchhost=batchhost),
'-N',
'-O', 'forward', '-S', ctrlsocket,
'-p', sshport, '-l', user, host]
'-p', sshport, '-l', user, host])
else:
# Create an ssh tunnel to the batch node using a proxycommand.
# The proxy command should utilise
# the existing control master connection
proxycmd = "ssh -o UserKnownHostsFile=/dev/null -o UpdateHostKeys=no -o LogLevel=error -o Stricthostkeychecking=no {user}@{host} -W {batchhost}:22 -S {ctrlsocket}".format(
proxycmd = " ".join(Ssh.SSHCMDWITHOPTS) + " {user}@{host} -W {batchhost}:22 -S {ctrlsocket}".format(
user=user, host=host,
ctrlsocket=ctrlsocket,
batchhost=batchhost)
sshcmd = ['ssh', '-o', 'Stricthostkeychecking=no', '-N',
'-L', '{localport}:localhost:{port}'.
sshcmd = Ssh.SSHCMDWITHOPTS.copy()
sshcmd.extend([
'-o', "ProxyCommand={}".format(proxycmd),
'-L', '127.0.0.1:{localport}:localhost:{port}'.
format(port=port, localport=localport),
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'IdentityFile=/dev/null',
'-o', 'PreferredAuthentications=publickey',
'-o', 'UpdateHostKeys=no',
'-o', 'LogLevel=error',
'-o', "ProxyCommand={}".format(proxycmd),
'-p', sshport, '-l', user, batchhost]
'-N',
'-p', sshport, '-l', user, batchhost])
tunnel_p = subprocess.Popen(sshcmd, env=env)
logger.debug('starting tunnel with {}'.format(" ".join(sshcmd)))
tunnel_p = subprocess.Popen(sshcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
logger.debug('tunnel created with pid {}'.format(tunnel_p.pid))
try:
if not Ssh.wait_for_tunnel(localport):
@@ -437,6 +499,18 @@ class Ssh:
sess.port = {}
sess.port.update({authtok: localport})
sess.pids.append(tunnel_p.pid)
sess.tunnels.append(tunnel_p)
# try:
# #tunnel_p.wait(timeout=3)
# import time
# time.sleep(3)
# tunnel_p.poll()
# if tunnel_p.returncode != None:
# (stdout,stderr) = tunnel_p.communicate()
# logger.error('tunnel process exited {} {} {}'.format(tunnel_p.returncode, stdout, stderr))
# sess.tunnels.remove(tunnel_p)
# except subprocess.TimeoutExpired:
# pass
return localport, [tunnel_p.pid]
# @staticmethod
@@ -454,11 +528,15 @@ class Ssh:
while notopen:
ssock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ssock.setblocking(True)
logger.debug('testing port {}'.format(localport))
try:
ssock.connect(('127.0.0.1', localport))
logger.debug('port is open')
notopen = False
ssock.close()
except socket.error:
logger.debug('port is not open yet')
time.sleep(0.1)
ssock.close()
return True
Loading