diff --git a/tes/sshwrapper/__init__.py b/tes/sshwrapper/__init__.py index 68d6e3a68108fb831381e9be17de9e11df153c7f..b73be842314613afd397ea5dafa4b06a2d88d315 100644 --- a/tes/sshwrapper/__init__.py +++ b/tes/sshwrapper/__init__.py @@ -105,8 +105,8 @@ class Ssh: import stat import logging logger = logging.getLogger() - ctrlsocket = "/tmp/cm-{}-{}".format(user,host) sess.lock.acquire() + ctrlsocket = "/tmp/cm-{}-{}".format(user,host) try: mode = os.stat(ctrlsocket).st_mode # If the control process died (docker restarted, or otherwise exited) but the socket was not removed: @@ -148,6 +148,7 @@ class Ssh: '-p', sshport, '-N','-l', user, host]) env = os.environ.copy() if sess.socket is None: + sess.lock.release() raise SshAgentException("No ssh-agent yet") env['SSH_AUTH_SOCK'] = sess.socket logger.debug("creating master socket") @@ -161,6 +162,15 @@ class Ssh: #logger.debug('communicate on the control port complete') logger.debug("spanwed ssh mux with pid {}".format(ctrl_p.pid)) #sess.pids.append(ctrl_p.pid) + if ctrlsocket in sess.ctrl_processes: + logger.error('existing control process!!!') + old_ctrl_p = sess.ctrl_processes[ctrlsocket] + old_ctrl_p.poll() + if old_ctrl_p.returncode is not None: + logger.error('old ctrl_p is still running?') + old_ctrl_p.kill() + (stderr,stdout) = old_ctrl_p.communicate() + logger.error('{} {}'.format(stderr,stdout)) sess.ctrl_processes[ctrlsocket] = ctrl_p notstarted = True notdead = True @@ -172,6 +182,7 @@ class Ssh: notdead = False (stdout,stderr) = ctrl_p.communicate() logger.error('ctrl_p died {} {} {}'.format(ctrl_p.returncode,stdout,stderr)) + sess.lock.release() raise SshCtrlException(stderr.decode()) try: mode = os.stat(ctrlsocket).st_mode @@ -182,6 +193,10 @@ class Ssh: wait=wait+1 if wait>60: ctrl_p.kill() + (stdout,stderr) = ctrl_p.communicate() + logger.error('ctrl_p was killed due to timeout {} {} {}'.format(ctrl_p.returncode,stdout,stderr)) + sess.lock.release() + raise SshCtrlException(stderr.decode()) sess.lock.release() return ctrlsocket