# -*- coding: utf-8 -*- """Tests the xonsh lexer.""" from __future__ import unicode_literals, print_function import os import sys from collections import Sequence sys.path.insert(0, os.path.abspath('..')) # FIXME from pprint import pformat import pytest try: from ply.lex import LexToken except ImportError: from xonsh.ply.lex import LexToken from xonsh.lexer import Lexer LEXER_ARGS = {'lextab': 'lexer_test_table', 'debug': 0} def ensure_tuple(x): if isinstance(x, LexToken): # line numbers can no longer be solely determined from the lexer #x = (x.type, x.value, x.lineno, x.lexpos) x = (x.type, x.value, x.lexpos) elif isinstance(x, tuple): pass elif isinstance(x, Sequence): x = tuple(x) else: raise TypeError('{0} is not a sequence'.format(x)) return x def tokens_equal(x, y): """Tests whether two token are equal.""" xtup = ensure_tuple(x) ytup = ensure_tuple(y) return xtup == ytup def assert_token_equal(x, y): """Asserts that two tokens are equal.""" if not tokens_equal(x, y): msg = 'The tokens differ: {0!r} != {1!r}'.format(x, y) pytest.fail(msg) return True def assert_tokens_equal(x, y): """Asserts that two token sequences are equal.""" if len(x) != len(y): msg = 'The tokens sequences have different lengths: {0!r} != {1!r}\n' msg += '# x\n{2}\n\n# y\n{3}' pytest.fail(msg.format(len(x), len(y), pformat(x), pformat(y))) diffs = [(a, b) for a, b in zip(x, y) if not tokens_equal(a, b)] if len(diffs) > 0: msg = ['The token sequences differ: '] for a, b in diffs: msg += ['', '- ' + repr(a), '+ ' + repr(b)] msg = '\n'.join(msg) pytest.fail(msg) return True def check_token(inp, exp): l = Lexer() l.input(inp) obs = list(l) if len(obs) != 1: msg = 'The observed sequence does not have length-1: {0!r} != 1\n' msg += '# obs\n{1}' pytest.fail(msg.format(len(obs), pformat(obs))) return assert_token_equal(exp, obs[0]) def check_tokens(inp, exp): l = Lexer() l.input(inp) obs = list(l) return assert_tokens_equal(exp, obs) def check_tokens_subproc(inp, exp): l = Lexer() l.input('$[{}]'.format(inp)) obs = list(l)[1:-1] return assert_tokens_equal(exp, obs) def test_int_literal(): assert check_token('42', ['NUMBER', '42', 0]) def test_hex_literal(): assert check_token('0x42', ['NUMBER', '0x42', 0]) def test_oct_o_literal(): assert check_token('0o42', ['NUMBER', '0o42', 0]) def test_bin_literal(): assert check_token('0b101010', ['NUMBER', '0b101010', 0]) def test_indent(): exp = [('INDENT', ' \t ', 0), ('NUMBER', '42', 5), ('DEDENT', '', 0)] assert check_tokens(' \t 42', exp) def test_post_whitespace(): inp = '42 \t ' exp = [('NUMBER', '42', 0)] assert check_tokens(inp, exp) def test_internal_whitespace(): inp = '42 +\t65' exp = [('NUMBER', '42', 0), ('PLUS', '+', 4), ('NUMBER', '65', 6),] assert check_tokens(inp, exp) def test_indent_internal_whitespace(): inp = ' 42 +\t65' exp = [('INDENT', ' ', 0), ('NUMBER', '42', 1), ('PLUS', '+', 5), ('NUMBER', '65', 7), ('DEDENT', '', 0)] assert check_tokens(inp, exp) def test_assignment(): inp = 'x = 42' exp = [('NAME', 'x', 0), ('EQUALS', '=', 2), ('NUMBER', '42', 4),] assert check_tokens(inp, exp) def test_multiline(): inp = 'x\ny' exp = [('NAME', 'x', 0), ('NEWLINE', '\n', 1), ('NAME', 'y', 0),] assert check_tokens(inp, exp) def test_atdollar_expression(): inp = '@$(which python)' exp = [('ATDOLLAR_LPAREN', '@$(', 0), ('NAME', 'which', 3), ('WS', ' ', 8), ('NAME', 'python', 9), ('RPAREN', ')', 15)] assert check_tokens(inp, exp) def test_and(): assert check_token('and', ['AND', 'and', 0]) def test_ampersand(): assert check_token('&', ['AMPERSAND', '&', 0]) def test_atdollar(): assert check_token('@$', ['ATDOLLAR', '@$', 0]) def test_doubleamp(): assert check_token('&&', ['AND', 'and', 0]) def test_pipe(): assert check_token('|', ['PIPE', '|', 0]) def test_doublepipe(): assert check_token('||', ['OR', 'or', 0]) def test_single_quote_literal(): assert check_token("'yo'", ['STRING', "'yo'", 0]) def test_double_quote_literal(): assert check_token('"yo"', ['STRING', '"yo"', 0]) def test_triple_single_quote_literal(): assert check_token("'''yo'''", ['STRING', "'''yo'''", 0]) def test_triple_double_quote_literal(): assert check_token('"""yo"""', ['STRING', '"""yo"""', 0]) def test_single_raw_string_literal(): assert check_token("r'yo'", ['STRING', "r'yo'", 0]) def test_double_raw_string_literal(): assert check_token('r"yo"', ['STRING', 'r"yo"', 0]) def test_single_unicode_literal(): assert check_token("u'yo'", ['STRING', "u'yo'", 0]) def test_double_unicode_literal(): assert check_token('u"yo"', ['STRING', 'u"yo"', 0]) def test_single_bytes_literal(): assert check_token("b'yo'", ['STRING', "b'yo'", 0]) def test_regex_globs(): for i in ('.*', r'\d*', '.*#{1,2}'): for p in ('', 'r', 'g', '@somethingelse'): c = '{}`{}`'.format(p,i) assert check_token(c, ['SEARCHPATH', c, 0]) @pytest.mark.parametrize('case', [ '0.0', '.0', '0.', '1e10', '1.e42', '0.1e42', '0.5e-42', '5E10', '5e+42']) def test_float_literals(case): assert check_token(case, ['NUMBER', case, 0]) def test_ioredir(): cases = ['2>1', 'err>out', 'o>', 'all>', 'e>o', 'e>', 'out>', '2>&1'] for s in cases: assert check_tokens_subproc(s, [('IOREDIRECT', s, 2)])