From 4030ed8050629cfc9d7b97a217f71b4c3db424fe Mon Sep 17 00:00:00 2001 From: Anthony Scopatz Date: Sun, 19 Feb 2017 20:48:26 -0500 Subject: [PATCH 1/2] addes split() method to lexer --- tests/test_lexer.py | 56 +++++++++++++++++++++++++++++++++++++++++++++ xonsh/lexer.py | 27 ++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/tests/test_lexer.py b/tests/test_lexer.py index e44d0829d..8b9c9fb23 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -32,12 +32,14 @@ def ensure_tuple(x): raise TypeError('{0} is not a sequence'.format(x)) return x + def tokens_equal(x, y): """Tests whether two token are equal.""" xtup = ensure_tuple(x) ytup = ensure_tuple(y) return xtup == ytup + def assert_token_equal(x, y): """Asserts that two tokens are equal.""" if not tokens_equal(x, y): @@ -45,6 +47,7 @@ def assert_token_equal(x, y): pytest.fail(msg) return True + def assert_tokens_equal(x, y): """Asserts that two token sequences are equal.""" if len(x) != len(y): @@ -60,6 +63,7 @@ def assert_tokens_equal(x, y): pytest.fail(msg) return True + def check_token(inp, exp): l = Lexer() l.input(inp) @@ -70,41 +74,50 @@ def check_token(inp, exp): pytest.fail(msg.format(len(obs), pformat(obs))) return assert_token_equal(exp, obs[0]) + def check_tokens(inp, exp): l = Lexer() l.input(inp) obs = list(l) return assert_tokens_equal(exp, obs) + def check_tokens_subproc(inp, exp): l = Lexer() l.input('$[{}]'.format(inp)) obs = list(l)[1:-1] return assert_tokens_equal(exp, obs) + def test_int_literal(): assert check_token('42', ['NUMBER', '42', 0]) + def test_hex_literal(): assert check_token('0x42', ['NUMBER', '0x42', 0]) + def test_oct_o_literal(): assert check_token('0o42', ['NUMBER', '0o42', 0]) + def test_bin_literal(): assert check_token('0b101010', ['NUMBER', '0b101010', 0]) + def test_indent(): exp = [('INDENT', ' \t ', 0), ('NUMBER', '42', 5), ('DEDENT', '', 0)] assert check_tokens(' \t 42', exp) + def test_post_whitespace(): inp = '42 \t ' exp = [('NUMBER', '42', 0)] assert check_tokens(inp, exp) + def test_internal_whitespace(): inp = '42 +\t65' exp = [('NUMBER', '42', 0), @@ -112,6 +125,7 @@ def test_internal_whitespace(): ('NUMBER', '65', 6),] assert check_tokens(inp, exp) + def test_indent_internal_whitespace(): inp = ' 42 +\t65' exp = [('INDENT', ' ', 0), @@ -121,6 +135,7 @@ def test_indent_internal_whitespace(): ('DEDENT', '', 0)] assert check_tokens(inp, exp) + def test_assignment(): inp = 'x = 42' exp = [('NAME', 'x', 0), @@ -128,6 +143,7 @@ def test_assignment(): ('NUMBER', '42', 4),] assert check_tokens(inp, exp) + def test_multiline(): inp = 'x\ny' exp = [('NAME', 'x', 0), @@ -144,51 +160,67 @@ def test_atdollar_expression(): ('RPAREN', ')', 15)] assert check_tokens(inp, exp) + def test_and(): assert check_token('and', ['AND', 'and', 0]) + def test_ampersand(): assert check_token('&', ['AMPERSAND', '&', 0]) + def test_atdollar(): assert check_token('@$', ['ATDOLLAR', '@$', 0]) + def test_doubleamp(): assert check_token('&&', ['AND', 'and', 0]) + def test_pipe(): assert check_token('|', ['PIPE', '|', 0]) + def test_doublepipe(): assert check_token('||', ['OR', 'or', 0]) + def test_single_quote_literal(): assert check_token("'yo'", ['STRING', "'yo'", 0]) + def test_double_quote_literal(): assert check_token('"yo"', ['STRING', '"yo"', 0]) + def test_triple_single_quote_literal(): assert check_token("'''yo'''", ['STRING', "'''yo'''", 0]) + def test_triple_double_quote_literal(): assert check_token('"""yo"""', ['STRING', '"""yo"""', 0]) + def test_single_raw_string_literal(): assert check_token("r'yo'", ['STRING', "r'yo'", 0]) + def test_double_raw_string_literal(): assert check_token('r"yo"', ['STRING', 'r"yo"', 0]) + def test_single_unicode_literal(): assert check_token("u'yo'", ['STRING', "u'yo'", 0]) + def test_double_unicode_literal(): assert check_token('u"yo"', ['STRING', 'u"yo"', 0]) + def test_single_bytes_literal(): assert check_token("b'yo'", ['STRING', "b'yo'", 0]) + def test_path_string_literal(): assert check_token("p'/foo'", ['STRING', "p'/foo'", 0]) assert check_token('p"/foo"', ['STRING', 'p"/foo"', 0]) @@ -204,12 +236,36 @@ def test_regex_globs(): c = '{}`{}`'.format(p,i) assert check_token(c, ['SEARCHPATH', c, 0]) + @pytest.mark.parametrize('case', [ '0.0', '.0', '0.', '1e10', '1.e42', '0.1e42', '0.5e-42', '5E10', '5e+42']) def test_float_literals(case): assert check_token(case, ['NUMBER', case, 0]) + def test_ioredir(): cases = ['2>1', 'err>out', 'o>', 'all>', 'e>o', 'e>', 'out>', '2>&1'] for s in cases: assert check_tokens_subproc(s, [('IOREDIRECT', s, 2)]) + + +@pytest.mark.parametrize('s, exp', [ + ('', []), + (' \t \n \t ', []), + ('echo hello', ['echo', 'hello']), + ('echo "hello"', ['echo', '"hello"']), + ('![echo "hello"]', ['![echo', '"hello"]']), + ('/usr/bin/echo hello', ['/usr/bin/echo', 'hello']), + ('$(/usr/bin/echo hello)', ['$(/usr/bin/echo', 'hello)']), + ('C:\\Python\\python.exe -m xonsh', ['C:\\Python\\python.exe', '-m', 'xonsh']), + ('print("""I am a triple string""")', ['print("""I am a triple string""")']), + ('print("""I am a \ntriple string""")', ['print("""I am a \ntriple string""")']), + ('echo $HOME', ['echo', '$HOME']), + ('echo -n $HOME', ['echo', '-n', '$HOME']), + ('echo --go=away', ['echo', '--go=away']), + ('echo --go=$HOME', ['echo', '--go=$HOME']), +]) +def test_lexer_split(s, exp): + lexer = Lexer() + obs = lexer.split(s) + assert exp == obs diff --git a/xonsh/lexer.py b/xonsh/lexer.py index 9f37758fa..3b30dca11 100644 --- a/xonsh/lexer.py +++ b/xonsh/lexer.py @@ -334,6 +334,33 @@ class Lexer(object): yield t t = self.token() + def split(self, s): + """Splits a string into a list of strings which are whitepace-separated + tokens. + """ + vals = [] + self.input(s) + l = c = -1 + ws = 'WS' + nl = '\n' + for t in self: + if t.type == ws: + continue + elif l < t.lineno: + vals.append(t.value) + elif len(vals) > 0 and c == t.lexpos: + vals[-1] = vals[-1] + t.value + else: + vals.append(t.value) + nnl = t.value.count(nl) + if nnl == 0: + l = t.lineno + c = t.lexpos + len(t.value) + else: + l = t.lineno + nnl + c = len(t.value.rpartition(nl)[-1]) + return vals + # # All the tokens recognized by the lexer # From a8224d29a2ccfa268915c04d5964737b2721c648 Mon Sep 17 00:00:00 2001 From: Anthony Scopatz Date: Sun, 19 Feb 2017 21:02:04 -0500 Subject: [PATCH 2/2] integrated lexer split into $(cmd) --- news/lex.rst | 17 +++++++++++++++++ xonsh/built_ins.py | 9 +++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 news/lex.rst diff --git a/news/lex.rst b/news/lex.rst new file mode 100644 index 000000000..610b6a89c --- /dev/null +++ b/news/lex.rst @@ -0,0 +1,17 @@ +**Added:** + +* The lexer has a new ``split()`` method which splits strings + according to xonsh's rules for whitespace and quotes. + +**Changed:** None + +**Deprecated:** None + +**Removed:** None + +**Fixed:** + +* The ``@$(cmd)`` operator now correctly splits strings according to + xonsh semantics, rather than just on whitespace using ``str.split()``. + +**Security:** None diff --git a/xonsh/built_ins.py b/xonsh/built_ins.py index c30df6fb9..14c1b724c 100644 --- a/xonsh/built_ins.py +++ b/xonsh/built_ins.py @@ -839,8 +839,13 @@ def subproc_captured_stdout(*cmds): def subproc_captured_inject(*cmds): """Runs a subprocess, capturing the output. Returns a list of - whitespace-separated strings in the stdout that was produced.""" - return [i.strip() for i in run_subproc(cmds, captured='stdout').split()] + whitespace-separated strings of the stdout that was produced. + The string is split using xonsh's lexer, rather than Python's str.split() + or shlex.split(). + """ + s = run_subproc(cmds, captured='stdout') + toks = builtins.__xonsh_execer__.parser.lexer.split(s) + return toks def subproc_captured_object(*cmds):