From 3b125ca5ad5c83fcd3b23ca13b7f67023634f472 Mon Sep 17 00:00:00 2001 From: Chris Hines <chris.hines@monash.edu> Date: Fri, 18 Sep 2020 10:30:54 +1000 Subject: [PATCH] still trying to get the ctrl_socket right --- tes/sshwrapper/__init__.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tes/sshwrapper/__init__.py b/tes/sshwrapper/__init__.py index 68d6e3a..b73be84 100644 --- a/tes/sshwrapper/__init__.py +++ b/tes/sshwrapper/__init__.py @@ -105,8 +105,8 @@ class Ssh: import stat import logging logger = logging.getLogger() - ctrlsocket = "/tmp/cm-{}-{}".format(user,host) sess.lock.acquire() + ctrlsocket = "/tmp/cm-{}-{}".format(user,host) try: mode = os.stat(ctrlsocket).st_mode # If the control process died (docker restarted, or otherwise exited) but the socket was not removed: @@ -148,6 +148,7 @@ class Ssh: '-p', sshport, '-N','-l', user, host]) env = os.environ.copy() if sess.socket is None: + sess.lock.release() raise SshAgentException("No ssh-agent yet") env['SSH_AUTH_SOCK'] = sess.socket logger.debug("creating master socket") @@ -161,6 +162,15 @@ class Ssh: #logger.debug('communicate on the control port complete') logger.debug("spanwed ssh mux with pid {}".format(ctrl_p.pid)) #sess.pids.append(ctrl_p.pid) + if ctrlsocket in sess.ctrl_processes: + logger.error('existing control process!!!') + old_ctrl_p = sess.ctrl_processes[ctrlsocket] + old_ctrl_p.poll() + if old_ctrl_p.returncode is not None: + logger.error('old ctrl_p is still running?') + old_ctrl_p.kill() + (stderr,stdout) = old_ctrl_p.communicate() + logger.error('{} {}'.format(stderr,stdout)) sess.ctrl_processes[ctrlsocket] = ctrl_p notstarted = True notdead = True @@ -172,6 +182,7 @@ class Ssh: notdead = False (stdout,stderr) = ctrl_p.communicate() logger.error('ctrl_p died {} {} {}'.format(ctrl_p.returncode,stdout,stderr)) + sess.lock.release() raise SshCtrlException(stderr.decode()) try: mode = os.stat(ctrlsocket).st_mode @@ -182,6 +193,10 @@ class Ssh: wait=wait+1 if wait>60: ctrl_p.kill() + (stdout,stderr) = ctrl_p.communicate() + logger.error('ctrl_p was killed due to timeout {} {} {}'.format(ctrl_p.returncode,stdout,stderr)) + sess.lock.release() + raise SshCtrlException(stderr.decode()) sess.lock.release() return ctrlsocket -- GitLab