mirror of
https://github.com/xonsh/xonsh.git
synced 2025-03-05 17:00:58 +01:00
improved error handling, and some cleanup
This commit is contained in:
parent
7ad7d81420
commit
34d2367768
1 changed files with 31 additions and 319 deletions
350
xonsh/lexer.py
350
xonsh/lexer.py
|
@ -67,14 +67,16 @@ def handle_dollar(state, token, stream):
|
|||
try:
|
||||
n = next(stream)
|
||||
except:
|
||||
raise Exception("missing token after $")
|
||||
m = "missing token after $"
|
||||
yield _new_token("ERRORTOKEN", m, token.start)
|
||||
|
||||
if n.start != token.end:
|
||||
raise Exception("unexpected whitespace after $")
|
||||
m = "unexpected whitespace after $"
|
||||
yield _new_token("ERRORTOKEN", m, token.start)
|
||||
|
||||
if n.type == tokenize.NAME:
|
||||
state['last'] = n
|
||||
yield _new_token('DOLLAR_NAME', '$' + n.string, token.start)
|
||||
state['last'] = token
|
||||
elif n.type == tokenize.OP and n.string == '(':
|
||||
state['pymode'].append(False)
|
||||
state['last'] = n
|
||||
|
@ -89,13 +91,15 @@ def handle_dollar(state, token, stream):
|
|||
yield _new_token('DOLLAR_LBRACE', '${', token.start)
|
||||
else:
|
||||
e = 'expected NAME, (, [, or {{ after $, but got {0}'
|
||||
raise Exception(e.format(n))
|
||||
m = e.format(n)
|
||||
yield _new_token("ERRORTOKEN", m, token.start)
|
||||
|
||||
def handle_at(state, token, stream):
|
||||
try:
|
||||
n = next(stream)
|
||||
except:
|
||||
raise Exception("missing token after @")
|
||||
m = "missing token after @"
|
||||
yield _new_token("ERRORTOKEN", m, token.start)
|
||||
|
||||
if n.type == tokenize.OP and n.string == '(' and \
|
||||
n.start == token.end:
|
||||
|
@ -146,7 +150,8 @@ def handle_backtick(state, token, stream):
|
|||
state['last'] = n
|
||||
else:
|
||||
e = "Could not find matching backtick for regex on line {0}"
|
||||
raise Exception(e.format(token.start[0]))
|
||||
m = e.format(token.start[0])
|
||||
yield _new_token("ERRORTOKEN", m, token.start)
|
||||
|
||||
def handle_newline(state, token, stream):
|
||||
try:
|
||||
|
@ -192,6 +197,11 @@ def handle_rbracket(state, token, stream):
|
|||
state['last'] = token
|
||||
yield _new_token('RBRACKET', ']', token.start)
|
||||
|
||||
def handle_error_space(state, token, stream):
|
||||
if state['pymode'][-1]:
|
||||
state['last'] = token
|
||||
yield _new_token('WS', ' ', token.start)
|
||||
|
||||
special_handlers = {
|
||||
tokenize.ENCODING: lambda s,t,st: [],
|
||||
tokenize.NEWLINE: handle_newline,
|
||||
|
@ -205,8 +215,9 @@ special_handlers = {
|
|||
(tokenize.ERRORTOKEN, '`'): handle_backtick,
|
||||
(tokenize.ERRORTOKEN, '?'): handle_question,
|
||||
(tokenize.OP, '@'): handle_at,
|
||||
(tokenize.ERRORTOKEN, ' '): handle_error_space,
|
||||
tokenize.INDENT: handle_indent,
|
||||
tokenize.DEDENT: handle_indent
|
||||
tokenize.DEDENT: handle_indent,
|
||||
}
|
||||
|
||||
def handle_token(state, token, stream):
|
||||
|
@ -234,7 +245,8 @@ def handle_token(state, token, stream):
|
|||
for i in special_handlers[typ](state, token, stream):
|
||||
yield i
|
||||
else:
|
||||
raise Exception('Unexpected token: {0}'.format(token))
|
||||
m = "Unexpected token: {0}".format(token)
|
||||
yield _new_token("ERRORTOKEN", m, token.start)
|
||||
|
||||
def preprocess_tokens(tokstream):
|
||||
tokstream = clear_NL(tokstream)
|
||||
|
@ -248,9 +260,15 @@ def clear_NL(tokstream):
|
|||
if i.type != tokenize.NL:
|
||||
yield i
|
||||
|
||||
def single_error(exc):
|
||||
yield _new_token("ERRORTOKEN", "{} (line {}, column {})".format(exc.msg, exc.lineno, exc.offset), (0,0))
|
||||
|
||||
from io import BytesIO
|
||||
def tok(s):
|
||||
return iter(tokenize.tokenize(BytesIO(s.encode('utf-8')).readline))
|
||||
try:
|
||||
return iter(tokenize.tokenize(BytesIO(s.encode('utf-8')).readline))
|
||||
except Exception as e:
|
||||
return iter(single_error(e))
|
||||
|
||||
|
||||
#synthesize a new PLY token
|
||||
|
@ -289,31 +307,13 @@ class Lexer(object):
|
|||
self.errfunc = errfunc
|
||||
self.fname = ''
|
||||
self.last = None
|
||||
self.lexer = None
|
||||
self.indent = ''
|
||||
self.in_py_mode = [True]
|
||||
|
||||
def build(self, **kwargs):
|
||||
"""Part of the PLY lexer API."""
|
||||
self.lexer = lex.lex(object=self, **kwargs)
|
||||
self.reset()
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
#self.lexer.lineno = 1
|
||||
self.indent = ''
|
||||
self.last = None
|
||||
self.in_py_mode = [True]
|
||||
self.in_parens = [False]
|
||||
|
||||
@property
|
||||
def lineno(self):
|
||||
if self.lexer is not None:
|
||||
return self.lexer.lineno
|
||||
|
||||
@lineno.setter
|
||||
def lineno(self, value):
|
||||
if self.lexer is not None:
|
||||
self.lexer.lineno = value
|
||||
pass
|
||||
|
||||
def input(self, s):
|
||||
"""Calls the lexer on the string s."""
|
||||
|
@ -324,24 +324,11 @@ class Lexer(object):
|
|||
"""Retrieves the next token."""
|
||||
try:
|
||||
self.last = next(self.token_stream)
|
||||
#print(self.last)
|
||||
print(self.last)
|
||||
return self.last
|
||||
except:
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def token_col(self, token):
|
||||
"""Discovers the token column number."""
|
||||
offset = self.lexer.lexdata.rfind('\n', 0, token.lexpos)
|
||||
return token.lexpos - offset
|
||||
|
||||
def _error(self, msg, token):
|
||||
location = self._make_tok_location(token)
|
||||
self.errfunc(msg, location[0], location[1])
|
||||
self.lexer.skip(1)
|
||||
|
||||
def _make_tok_location(self, token):
|
||||
return (token.lineno, self.token_col(token))
|
||||
|
||||
def __iter__(self):
|
||||
t = self.token()
|
||||
while t is not None:
|
||||
|
@ -407,278 +394,3 @@ class Lexer(object):
|
|||
# Ellipsis (...)
|
||||
'ELLIPSIS',
|
||||
)
|
||||
|
||||
#
|
||||
# Token Regexes
|
||||
#
|
||||
identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
|
||||
dollar = r'\$'
|
||||
|
||||
int_literal = '\d+'
|
||||
hex_literal = '0[xX][0-9a-fA-F]+'
|
||||
oct_literal = '0[oO]?[0-7]+'
|
||||
bin_literal = '0[bB]?[0-1]+'
|
||||
|
||||
# string literals
|
||||
triple_single_string = r"'''((\\(.|\n))|([^'\\])|('(?!''))|\n)*'''"
|
||||
triple_double_string = r'"""((\\(.|\n))|([^"\\])|("(?!""))|\n)*"""'
|
||||
single_single_string = r"'((\\(.|\n))|([^'\\]))*'"
|
||||
single_double_string = r'"((\\(.|\n))|([^"\\]))*"'
|
||||
triple_string = anyof(triple_single_string, triple_double_string)
|
||||
single_string = anyof(single_single_string, single_double_string)
|
||||
string_literal = anyof(triple_string, single_string)
|
||||
raw_string_literal = '[Rr]' + string_literal
|
||||
unicode_literal = '[Uu]' + string_literal
|
||||
bytes_literal = '[Bb]' + string_literal
|
||||
|
||||
# floating point
|
||||
float_exponent = r"(?:[eE][-+]?[0-9]+)"
|
||||
float_mantissa = r"(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.)"
|
||||
float_literal = ('((((' + float_mantissa + ')' + float_exponent +
|
||||
'?)|([0-9]+' + float_exponent + ')))')
|
||||
imag_literal = '(' + r'[0-9]+[jJ]' + '|' + float_literal + r'[jJ]' + ')'
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
|
||||
# Command line
|
||||
def t_INDENT(self, t):
|
||||
r'[ \t]+'
|
||||
last = self.last
|
||||
if not self.in_py_mode[-1]:
|
||||
return t
|
||||
elif last is not None and last.type != 'NEWLINE':
|
||||
return # returns None to skip internal whitespace
|
||||
i = self.indent
|
||||
v = t.value
|
||||
if len(i) > len(v):
|
||||
if not i.startswith(v):
|
||||
self._error("indentation level does not match previous level", t)
|
||||
t.type = 'DEDENT'
|
||||
elif not v.startswith(i):
|
||||
self._error("indentation level does not match previous level", t)
|
||||
self.indent = v
|
||||
t.lexer.lineno += 1
|
||||
return t
|
||||
|
||||
t_ENDMARKER = r'\x03'
|
||||
|
||||
# Newlines
|
||||
def t_NEWLINE(self, t):
|
||||
r'\n'
|
||||
if self.in_parens[-1]:
|
||||
t.lexer.lineno += 1
|
||||
return None
|
||||
else:
|
||||
return t
|
||||
|
||||
#
|
||||
# Ignore internal whitespace based on parentherical scope
|
||||
#
|
||||
|
||||
def t_AT_LPAREN(self, t):
|
||||
r'@\('
|
||||
self.in_parens.append(True)
|
||||
self.in_py_mode.append(True)
|
||||
return t
|
||||
|
||||
def t_DOLLAR_LPAREN(self, t):
|
||||
r'\$\('
|
||||
self.in_parens.append(True)
|
||||
self.in_py_mode.append(False)
|
||||
return t
|
||||
|
||||
def t_LPAREN(self, t):
|
||||
r'\('
|
||||
self.in_parens.append(True)
|
||||
self.in_py_mode.append(True)
|
||||
return t
|
||||
|
||||
def t_RPAREN(self, t):
|
||||
r'\)'
|
||||
self.in_parens.pop()
|
||||
self.in_py_mode.pop()
|
||||
return t
|
||||
|
||||
def t_DOLLAR_LBRACE(self, t):
|
||||
r'\$\{'
|
||||
self.in_parens.append(True)
|
||||
self.in_py_mode.append(True)
|
||||
return t
|
||||
|
||||
def t_LBRACE(self, t):
|
||||
r'\{'
|
||||
self.in_parens.append(True)
|
||||
self.in_py_mode.append(True)
|
||||
return t
|
||||
|
||||
def t_RBRACE(self, t):
|
||||
r'\}'
|
||||
self.in_parens.pop()
|
||||
self.in_py_mode.pop()
|
||||
return t
|
||||
|
||||
def t_DOLLAR_LBRACKET(self, t):
|
||||
r'\$\['
|
||||
self.in_parens.append(True)
|
||||
self.in_py_mode.append(False)
|
||||
return t
|
||||
|
||||
def t_LBRACKET(self, t):
|
||||
r'\['
|
||||
self.in_parens.append(True)
|
||||
self.in_py_mode.append(True)
|
||||
return t
|
||||
|
||||
def t_RBRACKET(self, t):
|
||||
r'\]'
|
||||
self.in_parens.pop()
|
||||
self.in_py_mode.pop()
|
||||
return t
|
||||
|
||||
# Basic Operators
|
||||
t_PLUS = r'\+'
|
||||
t_MINUS = r'-'
|
||||
t_TIMES = r'\*'
|
||||
t_DIVIDE = r'/'
|
||||
t_DOUBLEDIV = r'//'
|
||||
t_MOD = r'%'
|
||||
t_POW = r'\*\*'
|
||||
t_PIPE = r'\|'
|
||||
t_AMPERSAND = r'&'
|
||||
t_TILDE = r'~'
|
||||
t_XOR = r'\^'
|
||||
t_LSHIFT = r'<<'
|
||||
t_RSHIFT = r'>>'
|
||||
#t_LOGIC_OR = r'\|\|'
|
||||
#t_LOGIC_AND = r'&&'
|
||||
t_LT = r'<'
|
||||
t_GT = r'>'
|
||||
t_LE = r'<='
|
||||
t_GE = r'>='
|
||||
t_EQ = r'=='
|
||||
t_NE = r'!='
|
||||
#t_LARROW = r'<-'
|
||||
t_RARROW = r'->'
|
||||
|
||||
# Assignment Operators
|
||||
t_EQUALS = r'='
|
||||
t_PLUSEQUAL = r'\+='
|
||||
t_MINUSEQUAL = r'-='
|
||||
t_TIMESEQUAL = r'\*='
|
||||
t_DIVEQUAL = r'/='
|
||||
t_MODEQUAL = r'%='
|
||||
t_POWEQUAL = r'\*\*='
|
||||
t_LSHIFTEQUAL = r'<<='
|
||||
t_RSHIFTEQUAL = r'>>='
|
||||
t_AMPERSANDEQUAL = r'&='
|
||||
t_PIPEEQUAL = r'\|='
|
||||
t_XOREQUAL = r'\^='
|
||||
t_DOUBLEDIVEQUAL = r'//='
|
||||
t_DOLLAR = dollar
|
||||
t_REGEXPATH = r'`[^`]*`'
|
||||
|
||||
def t_DOUBLE_QUESTION(self, t):
|
||||
r'\?\?'
|
||||
return t
|
||||
|
||||
t_QUESTION = r'\?'
|
||||
|
||||
# Delimeters
|
||||
#t_LPAREN = r'\('
|
||||
#t_RPAREN = r'\)'
|
||||
#t_LBRACKET = r'\['
|
||||
#t_RBRACKET = r'\]'
|
||||
#t_LBRACE = r'\{'
|
||||
#t_RBRACE = r'\}'
|
||||
t_COMMA = r','
|
||||
t_PERIOD = r'\.'
|
||||
t_SEMI = r';'
|
||||
t_COLON = r':'
|
||||
t_AT = r'@'
|
||||
t_ELLIPSIS = r'\.\.\.'
|
||||
|
||||
def t_COMMENT(self, t):
|
||||
r'\#.*'
|
||||
return
|
||||
|
||||
#
|
||||
# Literals
|
||||
#
|
||||
|
||||
# strings, functions to ensure correct ordering
|
||||
|
||||
@TOKEN(string_literal)
|
||||
def t_STRING_LITERAL(self, t):
|
||||
return t
|
||||
|
||||
# float literal must come before int literals
|
||||
|
||||
@TOKEN(imag_literal)
|
||||
def t_IMAG_LITERAL(self, t):
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = eval(t.value)
|
||||
return t
|
||||
|
||||
@TOKEN(float_literal)
|
||||
def t_FLOAT_LITERAL(self, t):
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = float(t.value)
|
||||
return t
|
||||
|
||||
# ints, functions to ensure correct ordering
|
||||
|
||||
@TOKEN(hex_literal)
|
||||
def t_HEX_LITERAL(self, t):
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = int(t.value, 16)
|
||||
return t
|
||||
|
||||
@TOKEN(oct_literal)
|
||||
def t_OCT_LITERAL(self, t):
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = int(t.value, 8)
|
||||
return t
|
||||
|
||||
@TOKEN(bin_literal)
|
||||
def t_BIN_LITERAL(self, t):
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = int(t.value, 2)
|
||||
return t
|
||||
|
||||
@TOKEN(int_literal)
|
||||
def t_INT_LITERAL(self, t):
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
def t_NONE(self, t):
|
||||
r'None'
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = None
|
||||
return t
|
||||
|
||||
def t_TRUE(self, t):
|
||||
r'True'
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = True
|
||||
return t
|
||||
|
||||
def t_FALSE(self, t):
|
||||
r'False'
|
||||
if self.in_py_mode[-1]:
|
||||
t.value = False
|
||||
return t
|
||||
|
||||
# Extra
|
||||
@TOKEN(identifier)
|
||||
def t_NAME(self, t):
|
||||
if self.in_py_mode[-1] and t.value in self.pykeyword_map:
|
||||
t.type = self.pykeyword_map[t.value]
|
||||
return t
|
||||
|
||||
def t_error(self, t):
|
||||
msg = 'Invalid token {0!r}'.format(t.value[0])
|
||||
self._error(msg, t)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue