From 21da30e753dc0f57842e94f315f86c00ca6c3c72 Mon Sep 17 00:00:00 2001
From: Andy Kipp <anki-code@users.noreply.github.com>
Date: Thu, 2 May 2024 22:10:53 +0200
Subject: [PATCH] Fixed populating the return code for interrupted process.
 (#5380)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Motivation

There is annoying behavior when you run command in the loop and can't
interrupt e.g. [this
report](https://github.com/xonsh/xonsh/discussions/5371) and a bit
#5342. After diving into this I see the issue around return code.

### The essence

Basically ``p = subprocess.Popen()`` populates ``p.returncode`` after
``p.wait()``, ``p.poll()`` or ``p.communicate()``
([doc](https://docs.python.org/3/library/os.html#os.waitpid)). But if
you're using `os.waitpid()` BEFORE these functions you're capturing
return code from a signal subsystem and ``p.returncode`` will be ``0``
like success but it's not success. So after ``os.waitid`` call you need
to set return code manually ``p.returncode = -os.WTERMSIG(status)`` like
in Popen. Example:
```xsh
python  # python interactive

import os, signal, subprocess as sp

p = sp.Popen(['sleep', '100'])
p.pid
# 123
p.wait()
# Ctrl+C or `kill -SIGINT 123` from another terminal
p.returncode
# -2

# BUT:

p = sp.Popen(['sleep', '100'])
p.pid
# 123

pid, status = os.waitpid(p.pid, os.WUNTRACED)
# status=2

# From another terminal:
kill -SIGINT 123

p.wait()
# 0
p.returncode
# 0
```

```xsh
from xonsh.tools import describe_waitpid_status
describe_waitpid_status(2)
# WIFEXITED - False - Return True if the process returning status exited via the exit() system call.
# WEXITSTATUS - 0 - Return the process return code from status.
# WIFSIGNALED - True - Return True if the process returning status was terminated by a signal.
# WTERMSIG - 2 - Return the signal that terminated the process that provided the status value.
# WIFSTOPPED - False - Return True if the process returning status was stopped.
# WSTOPSIG - 0 - Return the signal that stopped the process that provided the status value.
```

See also: [Helpful things for
knight](https://github.com/xonsh/xonsh/pull/5361#issuecomment-2078826181).

### Before

```xsh
$RAISE_SUBPROC_ERROR = True

sleep 100
# Ctrl+C
_.rtn
# 0  # It's wrong and RAISE_SUBPROC_ERROR ignored.

for i in range(5):
    print(i)
    sleep 5
# 0
# Ctrl+C  # Can't interrupt
# 1
# 2
```

### After
```xsh
sleep 100
# Ctrl+C
_.rtn
# -2  # Like in default Popen

$RAISE_SUBPROC_ERROR = True
for i in range(5):
    print(i)
    sleep 5
# 0
# Ctrl+C
# subprocess.CalledProcessError
```

### Notes

* We need to refactor `xonsh.jobs`. It's pretty uncomfortable work with
module.
* The logic is blurry between Specs, Pipelines and Jobs. We need to
bring more clear functions.
* The `captured` variable looks like "just the way of capturing (stdout,
object)" but in fact it affects all logic very much. We need to create
table where we can see the logic difference for every type of capturing.
E.g. in `captured=stdout` mode we use `xonsh.jobs` to monitor the
command but in `captured=object` we avoid this and some logic from
`xonsh.jobs` applied in `stdout` mode but missed in `object` mode. We
need clear map.

## For community
⬇️ **Please click the 👍 reaction instead of leaving a `+1` or 👍
comment**

---------

Co-authored-by: a <1@1.1>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 news/waitpid_returncode.rst | 23 +++++++++++++++++++++++
 tests/procs/test_specs.py   | 17 ++++++++++++++++-
 xonsh/jobs.py               | 18 ++++++++++++++++--
 xonsh/procs/specs.py        | 35 +++++++++++++++++++++--------------
 xonsh/tools.py              | 14 ++++++++++++++
 5 files changed, 90 insertions(+), 17 deletions(-)
 create mode 100644 news/waitpid_returncode.rst
diff --git a/news/waitpid_returncode.rst b/news/waitpid_returncode.rst
new file mode 100644
index 000000000..98566e839
--- /dev/null
+++ b/news/waitpid_returncode.rst
@@ -0,0 +1,23 @@
+**Added:**
+
+* <news item>
+
+**Changed:**
+
+* <news item>
+
+**Deprecated:**
+
+* <news item>
+
+**Removed:**
+
+* <news item>
+
+**Fixed:**
+
+* Fixed populating the return code for interrupted process.
+
+**Security:**
+
+* <news item>
diff --git a/tests/procs/test_specs.py b/tests/procs/test_specs.py
index f2591e74b..402707e8b 100644
--- a/tests/procs/test_specs.py
+++ b/tests/procs/test_specs.py
@@ -1,6 +1,7 @@
 """Tests the xonsh.procs.specs"""
 
 import itertools
+import signal
 import sys
 from subprocess import Popen
 
@@ -8,7 +9,12 @@ import pytest
 
 from xonsh.procs.posix import PopenThread
 from xonsh.procs.proxies import STDOUT_DISPATCHER, ProcProxy, ProcProxyThread
-from xonsh.procs.specs import SubprocSpec, cmds_to_specs, run_subproc
+from xonsh.procs.specs import (
+    SubprocSpec,
+    _run_command_pipeline,
+    cmds_to_specs,
+    run_subproc,
+)
 from xonsh.pytest.tools import skip_if_on_windows
 from xonsh.tools import XonshError
 
@@ -134,6 +140,15 @@ def test_capture_always(
     assert exp in capfd.readouterr().out
 
 
+@skip_if_on_windows
+def test_interrupted_process_returncode(xonsh_session):
+    xonsh_session.env["RAISE_SUBPROC_ERROR"] = False
+    cmd = [["python", "-c", "import os, signal; os.kill(os.getpid(), signal.SIGINT)"]]
+    specs = cmds_to_specs(cmd, captured="stdout")
+    (p := _run_command_pipeline(specs, cmd)).end()
+    assert p.proc.returncode == -signal.SIGINT
+
+
 @skip_if_on_windows
 @pytest.mark.parametrize(
     "cmds, exp_stream_lines, exp_list_lines",
diff --git a/xonsh/jobs.py b/xonsh/jobs.py
index 0e841e479..3a239d595 100644
--- a/xonsh/jobs.py
+++ b/xonsh/jobs.py
@@ -39,6 +39,20 @@ _jobs_thread_local = threading.local()
 _tasks_main: collections.deque[int] = collections.deque()
 
 
+def waitpid(pid, opt):
+    """
+    Transparent wrapper on `os.waitpid` to make notes about undocumented subprocess behavior.
+    Basically ``p = subprocess.Popen()`` populates ``p.returncode`` after ``p.wait()``, ``p.poll()``
+    or ``p.communicate()`` (https://docs.python.org/3/library/os.html#os.waitpid).
+    But if you're using `os.waitpid()` BEFORE these functions you're capturing return code
+    from a signal subsystem and ``p.returncode`` will be ``0``.
+    After ``os.waitid`` call you need to set return code manually
+    ``p.returncode = -os.WTERMSIG(status)`` like in Popen.
+    See also ``xonsh.tools.describe_waitpid_status()``.
+    """
+    return os.waitpid(pid, opt)
+
+
 @contextlib.contextmanager
 def use_main_jobs():
     """Context manager that replaces a thread's task queue and job dictionary
@@ -270,7 +284,7 @@ else:
             if thread.pid is None:
                 # When the process stopped before os.waitpid it has no pid.
                 raise ChildProcessError("The process PID not found.")
-            _, wcode = os.waitpid(thread.pid, os.WUNTRACED)
+            _, wcode = waitpid(thread.pid, os.WUNTRACED)
         except ChildProcessError as e:  # No child processes
             if return_error:
                 return e
@@ -284,7 +298,7 @@ else:
         elif os.WIFSIGNALED(wcode):
             print()  # get a newline because ^C will have been printed
             thread.signal = (os.WTERMSIG(wcode), os.WCOREDUMP(wcode))
-            thread.returncode = None
+            thread.returncode = -os.WTERMSIG(wcode)  # Default Popen
         else:
             thread.returncode = os.WEXITSTATUS(wcode)
             thread.signal = None
diff --git a/xonsh/procs/specs.py b/xonsh/procs/specs.py
index 72fae475a..4751c14e9 100644
--- a/xonsh/procs/specs.py
+++ b/xonsh/procs/specs.py
@@ -934,25 +934,32 @@ def run_subproc(cmds, captured=False, envs=None):
         return _run_specs(specs, cmds)
 
 
-def _run_specs(specs, cmds):
+def _run_command_pipeline(specs, cmds):
     captured = specs[-1].captured
     if captured == "hiddenobject":
-        command = HiddenCommandPipeline(specs)
+        cp = HiddenCommandPipeline(specs)
     else:
-        command = CommandPipeline(specs)
-    proc = command.proc
-    background = command.spec.background
+        cp = CommandPipeline(specs)
+    proc = cp.proc
+    background = cp.spec.background
     if not all(x.is_proxy for x in specs):
         xj.add_job(
             {
                 "cmds": cmds,
-                "pids": [i.pid for i in command.procs],
+                "pids": [i.pid for i in cp.procs],
                 "obj": proc,
                 "bg": background,
-                "pipeline": command,
-                "pgrp": command.term_pgid,
+                "pipeline": cp,
+                "pgrp": cp.term_pgid,
             }
         )
+    return cp
+
+
+def _run_specs(specs, cmds):
+    cp = _run_command_pipeline(specs, cmds)
+    proc, captured, background = cp.proc, specs[-1].captured, cp.spec.background
+
     # For some reason, some programs are in a stopped state when the flow
     # reaches this point, hence a SIGCONT should be sent to `proc` to make
     # sure that the shell doesn't hang.
@@ -961,16 +968,16 @@ def _run_specs(specs, cmds):
 
     # now figure out what we should return
     if captured == "object":
-        return command  # object can be returned even if backgrounding
+        return cp  # object can be returned even if backgrounding
     elif captured == "hiddenobject":
         if not background:
-            command.end()
-        return command
+            cp.end()
+        return cp
     elif background:
         return
     elif captured == "stdout":
-        command.end()
-        return command.output
+        cp.end()
+        return cp.output
     else:
-        command.end()
+        cp.end()
         return
diff --git a/xonsh/tools.py b/xonsh/tools.py
index 550f5dffe..7053a9e02 100644
--- a/xonsh/tools.py
+++ b/xonsh/tools.py
@@ -2841,3 +2841,17 @@ def to_repr_pretty_(inst, p, cycle):
         elif len(inst):
             p.break_()
             p.pretty(dict(inst))
+
+
+def describe_waitpid_status(status):
+    """Describes ``pid, status = os.waitpid(pid, opt)`` status."""
+    funcs = [
+        os.WIFEXITED,
+        os.WEXITSTATUS,
+        os.WIFSIGNALED,
+        os.WTERMSIG,
+        os.WIFSTOPPED,
+        os.WSTOPSIG,
+    ]
+    for f in funcs:
+        print(f.__name__, "-", f(status), "-", f.__doc__)