From 4030ed8050629cfc9d7b97a217f71b4c3db424fe Mon Sep 17 00:00:00 2001
From: Anthony Scopatz <scopatz@gmail.com>
Date: Sun, 19 Feb 2017 20:48:26 -0500
Subject: [PATCH 1/2] addes split() method to lexer

---
 tests/test_lexer.py | 56 +++++++++++++++++++++++++++++++++++++++++++++
 xonsh/lexer.py      | 27 ++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/tests/test_lexer.py b/tests/test_lexer.py
index e44d0829d..8b9c9fb23 100644
--- a/tests/test_lexer.py
+++ b/tests/test_lexer.py
@@ -32,12 +32,14 @@ def ensure_tuple(x):
         raise TypeError('{0} is not a sequence'.format(x))
     return x
 
+
 def tokens_equal(x, y):
     """Tests whether two token are equal."""
     xtup = ensure_tuple(x)
     ytup = ensure_tuple(y)
     return xtup == ytup
 
+
 def assert_token_equal(x, y):
     """Asserts that two tokens are equal."""
     if not tokens_equal(x, y):
@@ -45,6 +47,7 @@ def assert_token_equal(x, y):
         pytest.fail(msg)
     return True
 
+
 def assert_tokens_equal(x, y):
     """Asserts that two token sequences are equal."""
     if len(x) != len(y):
@@ -60,6 +63,7 @@ def assert_tokens_equal(x, y):
         pytest.fail(msg)
     return True
 
+
 def check_token(inp, exp):
     l = Lexer()
     l.input(inp)
@@ -70,41 +74,50 @@ def check_token(inp, exp):
         pytest.fail(msg.format(len(obs), pformat(obs)))
     return assert_token_equal(exp, obs[0])
 
+
 def check_tokens(inp, exp):
     l = Lexer()
     l.input(inp)
     obs = list(l)
     return assert_tokens_equal(exp, obs)
 
+
 def check_tokens_subproc(inp, exp):
     l = Lexer()
     l.input('$[{}]'.format(inp))
     obs = list(l)[1:-1]
     return assert_tokens_equal(exp, obs)
 
+
 def test_int_literal():
     assert check_token('42', ['NUMBER', '42', 0])
 
+
 def test_hex_literal():
     assert check_token('0x42', ['NUMBER', '0x42', 0])
 
+
 def test_oct_o_literal():
     assert check_token('0o42', ['NUMBER', '0o42', 0])
 
+
 def test_bin_literal():
     assert check_token('0b101010', ['NUMBER', '0b101010', 0])
 
+
 def test_indent():
     exp = [('INDENT', '  \t  ', 0),
            ('NUMBER', '42', 5),
            ('DEDENT', '', 0)]
     assert check_tokens('  \t  42', exp)
 
+
 def test_post_whitespace():
     inp = '42  \t  '
     exp = [('NUMBER', '42', 0)]
     assert check_tokens(inp, exp)
 
+
 def test_internal_whitespace():
     inp = '42  +\t65'
     exp = [('NUMBER', '42', 0),
@@ -112,6 +125,7 @@ def test_internal_whitespace():
            ('NUMBER', '65', 6),]
     assert check_tokens(inp, exp)
 
+
 def test_indent_internal_whitespace():
     inp = ' 42  +\t65'
     exp = [('INDENT', ' ', 0),
@@ -121,6 +135,7 @@ def test_indent_internal_whitespace():
            ('DEDENT', '', 0)]
     assert check_tokens(inp, exp)
 
+
 def test_assignment():
     inp = 'x = 42'
     exp = [('NAME', 'x', 0),
@@ -128,6 +143,7 @@ def test_assignment():
            ('NUMBER', '42', 4),]
     assert check_tokens(inp, exp)
 
+
 def test_multiline():
     inp = 'x\ny'
     exp = [('NAME', 'x', 0),
@@ -144,51 +160,67 @@ def test_atdollar_expression():
            ('RPAREN', ')', 15)]
     assert check_tokens(inp, exp)
 
+
 def test_and():
     assert check_token('and', ['AND', 'and', 0])
 
+
 def test_ampersand():
     assert check_token('&', ['AMPERSAND', '&', 0])
 
+
 def test_atdollar():
     assert check_token('@$', ['ATDOLLAR', '@$', 0])
 
+
 def test_doubleamp():
     assert check_token('&&', ['AND', 'and', 0])
 
+
 def test_pipe():
     assert check_token('|', ['PIPE', '|', 0])
 
+
 def test_doublepipe():
     assert check_token('||', ['OR', 'or', 0])
 
+
 def test_single_quote_literal():
     assert check_token("'yo'", ['STRING', "'yo'", 0])
 
+
 def test_double_quote_literal():
     assert check_token('"yo"', ['STRING', '"yo"', 0])
 
+
 def test_triple_single_quote_literal():
     assert check_token("'''yo'''", ['STRING', "'''yo'''", 0])
 
+
 def test_triple_double_quote_literal():
     assert check_token('"""yo"""', ['STRING', '"""yo"""', 0])
 
+
 def test_single_raw_string_literal():
     assert check_token("r'yo'", ['STRING', "r'yo'", 0])
 
+
 def test_double_raw_string_literal():
     assert check_token('r"yo"', ['STRING', 'r"yo"', 0])
 
+
 def test_single_unicode_literal():
     assert check_token("u'yo'", ['STRING', "u'yo'", 0])
 
+
 def test_double_unicode_literal():
     assert check_token('u"yo"', ['STRING', 'u"yo"', 0])
 
+
 def test_single_bytes_literal():
     assert check_token("b'yo'", ['STRING', "b'yo'", 0])
 
+
 def test_path_string_literal():
     assert check_token("p'/foo'", ['STRING', "p'/foo'", 0])
     assert check_token('p"/foo"', ['STRING', 'p"/foo"', 0])
@@ -204,12 +236,36 @@ def test_regex_globs():
             c = '{}`{}`'.format(p,i)
             assert check_token(c, ['SEARCHPATH', c, 0])
 
+
 @pytest.mark.parametrize('case', [
     '0.0', '.0', '0.', '1e10', '1.e42', '0.1e42', '0.5e-42', '5E10', '5e+42'])
 def test_float_literals(case):
     assert check_token(case, ['NUMBER', case, 0])
 
+
 def test_ioredir():
     cases = ['2>1', 'err>out', 'o>', 'all>', 'e>o', 'e>', 'out>', '2>&1']
     for s in cases:
         assert check_tokens_subproc(s, [('IOREDIRECT', s, 2)])
+
+
+@pytest.mark.parametrize('s, exp', [
+    ('', []),
+    ('   \t   \n \t  ', []),
+    ('echo hello', ['echo', 'hello']),
+    ('echo "hello"', ['echo', '"hello"']),
+    ('![echo "hello"]', ['![echo', '"hello"]']),
+    ('/usr/bin/echo hello', ['/usr/bin/echo', 'hello']),
+    ('$(/usr/bin/echo hello)', ['$(/usr/bin/echo', 'hello)']),
+    ('C:\\Python\\python.exe -m xonsh', ['C:\\Python\\python.exe', '-m', 'xonsh']),
+    ('print("""I am a triple string""")', ['print("""I am a triple string""")']),
+    ('print("""I am a \ntriple string""")', ['print("""I am a \ntriple string""")']),
+    ('echo $HOME', ['echo', '$HOME']),
+    ('echo -n $HOME', ['echo', '-n', '$HOME']),
+    ('echo --go=away', ['echo', '--go=away']),
+    ('echo --go=$HOME', ['echo', '--go=$HOME']),
+])
+def test_lexer_split(s, exp):
+    lexer = Lexer()
+    obs = lexer.split(s)
+    assert exp == obs
diff --git a/xonsh/lexer.py b/xonsh/lexer.py
index 9f37758fa..3b30dca11 100644
--- a/xonsh/lexer.py
+++ b/xonsh/lexer.py
@@ -334,6 +334,33 @@ class Lexer(object):
             yield t
             t = self.token()
 
+    def split(self, s):
+        """Splits a string into a list of strings which are whitepace-separated
+        tokens.
+        """
+        vals = []
+        self.input(s)
+        l = c = -1
+        ws = 'WS'
+        nl = '\n'
+        for t in self:
+            if t.type == ws:
+                continue
+            elif l < t.lineno:
+                vals.append(t.value)
+            elif len(vals) > 0 and c == t.lexpos:
+                vals[-1] = vals[-1] + t.value
+            else:
+                vals.append(t.value)
+            nnl = t.value.count(nl)
+            if nnl == 0:
+                l = t.lineno
+                c = t.lexpos + len(t.value)
+            else:
+                l = t.lineno + nnl
+                c = len(t.value.rpartition(nl)[-1])
+        return vals
+
     #
     # All the tokens recognized by the lexer
     #

From a8224d29a2ccfa268915c04d5964737b2721c648 Mon Sep 17 00:00:00 2001
From: Anthony Scopatz <scopatz@gmail.com>
Date: Sun, 19 Feb 2017 21:02:04 -0500
Subject: [PATCH 2/2] integrated lexer split into $(cmd)

---
 news/lex.rst       | 17 +++++++++++++++++
 xonsh/built_ins.py |  9 +++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 news/lex.rst

diff --git a/news/lex.rst b/news/lex.rst
new file mode 100644
index 000000000..610b6a89c
--- /dev/null
+++ b/news/lex.rst
@@ -0,0 +1,17 @@
+**Added:**
+
+* The lexer has a new ``split()`` method which splits strings
+  according to xonsh's rules for whitespace and quotes.
+
+**Changed:** None
+
+**Deprecated:** None
+
+**Removed:** None
+
+**Fixed:**
+
+* The ``@$(cmd)`` operator now correctly splits strings according to
+  xonsh semantics, rather than just on whitespace using ``str.split()``.
+
+**Security:** None
diff --git a/xonsh/built_ins.py b/xonsh/built_ins.py
index c30df6fb9..14c1b724c 100644
--- a/xonsh/built_ins.py
+++ b/xonsh/built_ins.py
@@ -839,8 +839,13 @@ def subproc_captured_stdout(*cmds):
 
 def subproc_captured_inject(*cmds):
     """Runs a subprocess, capturing the output. Returns a list of
-    whitespace-separated strings in the stdout that was produced."""
-    return [i.strip() for i in run_subproc(cmds, captured='stdout').split()]
+    whitespace-separated strings of the stdout that was produced.
+    The string is split using xonsh's lexer, rather than Python's str.split()
+    or shlex.split().
+    """
+    s = run_subproc(cmds, captured='stdout')
+    toks = builtins.__xonsh_execer__.parser.lexer.split(s)
+    return toks
 
 
 def subproc_captured_object(*cmds):