mirror of
https://github.com/xonsh/xonsh.git
synced 2025-03-04 16:34:47 +01:00
431 lines
11 KiB
Python
431 lines
11 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Tests the xonsh lexer."""
|
|
from __future__ import unicode_literals, print_function
|
|
import os
|
|
import sys
|
|
from collections import Sequence
|
|
sys.path.insert(0, os.path.abspath('..')) # FIXME
|
|
from pprint import pformat
|
|
|
|
import pytest
|
|
|
|
try:
|
|
from ply.lex import LexToken
|
|
except ImportError:
|
|
from xonsh.ply.ply.lex import LexToken
|
|
|
|
|
|
from xonsh.lexer import Lexer
|
|
|
|
LEXER_ARGS = {'lextab': 'lexer_test_table', 'debug': 0}
|
|
|
|
def ensure_tuple(x):
|
|
if isinstance(x, LexToken):
|
|
# line numbers can no longer be solely determined from the lexer
|
|
#x = (x.type, x.value, x.lineno, x.lexpos)
|
|
x = (x.type, x.value, x.lexpos)
|
|
elif isinstance(x, tuple):
|
|
pass
|
|
elif isinstance(x, Sequence):
|
|
x = tuple(x)
|
|
else:
|
|
raise TypeError('{0} is not a sequence'.format(x))
|
|
return x
|
|
|
|
|
|
def tokens_equal(x, y):
|
|
"""Tests whether two token are equal."""
|
|
xtup = ensure_tuple(x)
|
|
ytup = ensure_tuple(y)
|
|
return xtup == ytup
|
|
|
|
|
|
def assert_token_equal(x, y):
|
|
"""Asserts that two tokens are equal."""
|
|
if not tokens_equal(x, y):
|
|
msg = 'The tokens differ: {0!r} != {1!r}'.format(x, y)
|
|
pytest.fail(msg)
|
|
return True
|
|
|
|
|
|
def assert_tokens_equal(x, y):
|
|
"""Asserts that two token sequences are equal."""
|
|
if len(x) != len(y):
|
|
msg = 'The tokens sequences have different lengths: {0!r} != {1!r}\n'
|
|
msg += '# x\n{2}\n\n# y\n{3}'
|
|
pytest.fail(msg.format(len(x), len(y), pformat(x), pformat(y)))
|
|
diffs = [(a, b) for a, b in zip(x, y) if not tokens_equal(a, b)]
|
|
if len(diffs) > 0:
|
|
msg = ['The token sequences differ: ']
|
|
for a, b in diffs:
|
|
msg += ['', '- ' + repr(a), '+ ' + repr(b)]
|
|
msg = '\n'.join(msg)
|
|
pytest.fail(msg)
|
|
return True
|
|
|
|
|
|
def check_token(inp, exp):
|
|
l = Lexer()
|
|
l.input(inp)
|
|
obs = list(l)
|
|
if len(obs) != 1:
|
|
msg = 'The observed sequence does not have length-1: {0!r} != 1\n'
|
|
msg += '# obs\n{1}'
|
|
pytest.fail(msg.format(len(obs), pformat(obs)))
|
|
return assert_token_equal(exp, obs[0])
|
|
|
|
|
|
def check_tokens(inp, exp):
|
|
l = Lexer()
|
|
l.input(inp)
|
|
obs = list(l)
|
|
return assert_tokens_equal(exp, obs)
|
|
|
|
|
|
def check_tokens_subproc(inp, exp, stop=-1):
|
|
l = Lexer()
|
|
l.input('$[{}]'.format(inp))
|
|
obs = list(l)[1:stop]
|
|
return assert_tokens_equal(exp, obs)
|
|
|
|
|
|
def test_int_literal():
|
|
assert check_token('42', ['NUMBER', '42', 0])
|
|
assert check_token('4_2', ['NUMBER', '4_2', 0])
|
|
|
|
|
|
def test_hex_literal():
|
|
assert check_token('0x42', ['NUMBER', '0x42', 0])
|
|
assert check_token('0x4_2', ['NUMBER', '0x4_2', 0])
|
|
|
|
|
|
def test_oct_o_literal():
|
|
assert check_token('0o42', ['NUMBER', '0o42', 0])
|
|
assert check_token('0o4_2', ['NUMBER', '0o4_2', 0])
|
|
|
|
|
|
def test_bin_literal():
|
|
assert check_token('0b101010', ['NUMBER', '0b101010', 0])
|
|
assert check_token('0b10_10_10', ['NUMBER', '0b10_10_10', 0])
|
|
|
|
|
|
def test_indent():
|
|
exp = [('INDENT', ' \t ', 0),
|
|
('NUMBER', '42', 5),
|
|
('DEDENT', '', 0)]
|
|
assert check_tokens(' \t 42', exp)
|
|
|
|
|
|
def test_post_whitespace():
|
|
inp = '42 \t '
|
|
exp = [('NUMBER', '42', 0)]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_internal_whitespace():
|
|
inp = '42 +\t65'
|
|
exp = [('NUMBER', '42', 0),
|
|
('PLUS', '+', 4),
|
|
('NUMBER', '65', 6),]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_indent_internal_whitespace():
|
|
inp = ' 42 +\t65'
|
|
exp = [('INDENT', ' ', 0),
|
|
('NUMBER', '42', 1),
|
|
('PLUS', '+', 5),
|
|
('NUMBER', '65', 7),
|
|
('DEDENT', '', 0)]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_assignment():
|
|
inp = 'x = 42'
|
|
exp = [('NAME', 'x', 0),
|
|
('EQUALS', '=', 2),
|
|
('NUMBER', '42', 4),]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_multiline():
|
|
inp = 'x\ny'
|
|
exp = [('NAME', 'x', 0),
|
|
('NEWLINE', '\n', 1),
|
|
('NAME', 'y', 0),]
|
|
assert check_tokens(inp, exp)
|
|
|
|
def test_atdollar_expression():
|
|
inp = '@$(which python)'
|
|
exp = [('ATDOLLAR_LPAREN', '@$(', 0),
|
|
('NAME', 'which', 3),
|
|
('WS', ' ', 8),
|
|
('NAME', 'python', 9),
|
|
('RPAREN', ')', 15)]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_and():
|
|
assert check_token('and', ['AND', 'and', 0])
|
|
|
|
|
|
def test_ampersand():
|
|
assert check_token('&', ['AMPERSAND', '&', 0])
|
|
|
|
|
|
def test_not_really_and_pre():
|
|
inp = "![foo-and]"
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'foo', 2),
|
|
('MINUS', '-', 5),
|
|
('NAME', 'and', 6),
|
|
('RBRACKET', ']', 9),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_not_really_and_post():
|
|
inp = "![and-bar]"
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'and', 2),
|
|
('MINUS', '-', 5),
|
|
('NAME', 'bar', 6),
|
|
('RBRACKET', ']', 9),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_not_really_and_pre_post():
|
|
inp = "![foo-and-bar]"
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'foo', 2),
|
|
('MINUS', '-', 5),
|
|
('NAME', 'and', 6),
|
|
('MINUS', '-', 9),
|
|
('NAME', 'bar', 10),
|
|
('RBRACKET', ']', 13),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_not_really_or_pre():
|
|
inp = "![foo-or]"
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'foo', 2),
|
|
('MINUS', '-', 5),
|
|
('NAME', 'or', 6),
|
|
('RBRACKET', ']', 8),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_not_really_or_post():
|
|
inp = "![or-bar]"
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'or', 2),
|
|
('MINUS', '-', 4),
|
|
('NAME', 'bar', 5),
|
|
('RBRACKET', ']', 8),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_not_really_or_pre_post():
|
|
inp = "![foo-or-bar]"
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'foo', 2),
|
|
('MINUS', '-', 5),
|
|
('NAME', 'or', 6),
|
|
('MINUS', '-', 8),
|
|
('NAME', 'bar', 9),
|
|
('RBRACKET', ']', 12),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_subproc_line_cont_space():
|
|
inp = ("![echo --option1 value1 \\\n"
|
|
" --option2 value2 \\\n"
|
|
" --optionZ valueZ]")
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'echo', 2),
|
|
('WS', ' ', 6),
|
|
('MINUS', '-', 7),
|
|
('MINUS', '-', 8),
|
|
('NAME', 'option1', 9),
|
|
('WS', ' ', 16),
|
|
('NAME', 'value1', 17),
|
|
('WS', ' ', 23),
|
|
('MINUS', '-', 5),
|
|
('MINUS', '-', 6),
|
|
('NAME', 'option2', 7),
|
|
('WS', ' ', 14),
|
|
('NAME', 'value2', 15),
|
|
('WS', ' ', 21),
|
|
('MINUS', '-', 5),
|
|
('MINUS', '-', 6),
|
|
('NAME', 'optionZ', 7),
|
|
('WS', ' ', 14),
|
|
('NAME', 'valueZ', 15),
|
|
('RBRACKET',']', 21),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_subproc_line_cont_nospace():
|
|
inp = ("![echo --option1 value1\\\n"
|
|
" --option2 value2\\\n"
|
|
" --optionZ valueZ]")
|
|
exp = [
|
|
('BANG_LBRACKET', '![', 0),
|
|
('NAME', 'echo', 2),
|
|
('WS', ' ', 6),
|
|
('MINUS', '-', 7),
|
|
('MINUS', '-', 8),
|
|
('NAME', 'option1', 9),
|
|
('WS', ' ', 16),
|
|
('NAME', 'value1', 17),
|
|
('WS', '\\', 23),
|
|
('MINUS', '-', 5),
|
|
('MINUS', '-', 6),
|
|
('NAME', 'option2', 7),
|
|
('WS', ' ', 14),
|
|
('NAME', 'value2', 15),
|
|
('WS', '\\', 21),
|
|
('MINUS', '-', 5),
|
|
('MINUS', '-', 6),
|
|
('NAME', 'optionZ', 7),
|
|
('WS', ' ', 14),
|
|
('NAME', 'valueZ', 15),
|
|
('RBRACKET',']', 21),
|
|
]
|
|
assert check_tokens(inp, exp)
|
|
|
|
|
|
def test_atdollar():
|
|
assert check_token('@$', ['ATDOLLAR', '@$', 0])
|
|
|
|
|
|
def test_doubleamp():
|
|
assert check_token('&&', ['AND', 'and', 0])
|
|
|
|
|
|
def test_pipe():
|
|
assert check_token('|', ['PIPE', '|', 0])
|
|
|
|
|
|
def test_doublepipe():
|
|
assert check_token('||', ['OR', 'or', 0])
|
|
|
|
|
|
def test_single_quote_literal():
|
|
assert check_token("'yo'", ['STRING', "'yo'", 0])
|
|
|
|
|
|
def test_double_quote_literal():
|
|
assert check_token('"yo"', ['STRING', '"yo"', 0])
|
|
|
|
|
|
def test_triple_single_quote_literal():
|
|
assert check_token("'''yo'''", ['STRING', "'''yo'''", 0])
|
|
|
|
|
|
def test_triple_double_quote_literal():
|
|
assert check_token('"""yo"""', ['STRING', '"""yo"""', 0])
|
|
|
|
|
|
def test_single_raw_string_literal():
|
|
assert check_token("r'yo'", ['STRING', "r'yo'", 0])
|
|
|
|
|
|
def test_double_raw_string_literal():
|
|
assert check_token('r"yo"', ['STRING', 'r"yo"', 0])
|
|
|
|
|
|
def test_single_f_string_literal():
|
|
assert check_token("f'{yo}'", ['STRING', "f'{yo}'", 0])
|
|
|
|
|
|
def test_double_f_string_literal():
|
|
assert check_token('f"{yo}"', ['STRING', 'f"{yo}"', 0])
|
|
|
|
|
|
def test_single_unicode_literal():
|
|
assert check_token("u'yo'", ['STRING', "u'yo'", 0])
|
|
|
|
|
|
def test_double_unicode_literal():
|
|
assert check_token('u"yo"', ['STRING', 'u"yo"', 0])
|
|
|
|
|
|
def test_single_bytes_literal():
|
|
assert check_token("b'yo'", ['STRING', "b'yo'", 0])
|
|
|
|
|
|
def test_path_string_literal():
|
|
assert check_token("p'/foo'", ['STRING', "p'/foo'", 0])
|
|
assert check_token('p"/foo"', ['STRING', 'p"/foo"', 0])
|
|
assert check_token("pr'/foo'", ['STRING', "pr'/foo'", 0])
|
|
assert check_token('pr"/foo"', ['STRING', 'pr"/foo"', 0])
|
|
assert check_token("rp'/foo'", ['STRING', "rp'/foo'", 0])
|
|
assert check_token('rp"/foo"', ['STRING', 'rp"/foo"', 0])
|
|
|
|
|
|
def test_regex_globs():
|
|
for i in ('.*', r'\d*', '.*#{1,2}'):
|
|
for p in ('', 'r', 'g', '@somethingelse', 'p', 'pg'):
|
|
c = '{}`{}`'.format(p,i)
|
|
assert check_token(c, ['SEARCHPATH', c, 0])
|
|
|
|
|
|
@pytest.mark.parametrize('case', [
|
|
'0.0', '.0', '0.', '1e10', '1.e42', '0.1e42', '0.5e-42', '5E10', '5e+42', '1_0e1_0'])
|
|
def test_float_literals(case):
|
|
assert check_token(case, ['NUMBER', case, 0])
|
|
|
|
@pytest.mark.parametrize('case', [
|
|
'2>1', 'err>out', 'o>', 'all>', 'e>o', 'e>', 'out>', '2>&1'
|
|
])
|
|
def test_ioredir(case):
|
|
assert check_tokens_subproc(case, [('IOREDIRECT', case, 2)], stop=-2)
|
|
|
|
|
|
@pytest.mark.parametrize('case', [
|
|
'>', '>>', '<', 'e>',
|
|
'> ', '>> ', '< ', 'e> ',
|
|
])
|
|
def test_redir_whitespace(case):
|
|
inp = '![{}/path/to/file]'.format(case)
|
|
l = Lexer()
|
|
l.input(inp)
|
|
obs = list(l)
|
|
assert obs[2].type == 'WS'
|
|
|
|
|
|
@pytest.mark.parametrize('s, exp', [
|
|
('', []),
|
|
(' \t \n \t ', []),
|
|
('echo hello', ['echo', 'hello']),
|
|
('echo "hello"', ['echo', '"hello"']),
|
|
('![echo "hello"]', ['![echo', '"hello"]']),
|
|
('/usr/bin/echo hello', ['/usr/bin/echo', 'hello']),
|
|
('$(/usr/bin/echo hello)', ['$(/usr/bin/echo', 'hello)']),
|
|
('C:\\Python\\python.exe -m xonsh', ['C:\\Python\\python.exe', '-m', 'xonsh']),
|
|
('print("""I am a triple string""")', ['print("""I am a triple string""")']),
|
|
('print("""I am a \ntriple string""")', ['print("""I am a \ntriple string""")']),
|
|
('echo $HOME', ['echo', '$HOME']),
|
|
('echo -n $HOME', ['echo', '-n', '$HOME']),
|
|
('echo --go=away', ['echo', '--go=away']),
|
|
('echo --go=$HOME', ['echo', '--go=$HOME']),
|
|
])
|
|
def test_lexer_split(s, exp):
|
|
lexer = Lexer()
|
|
obs = lexer.split(s)
|
|
assert exp == obs
|