refactoring per suggestions

This commit is contained in:
adam j hartz 2015-03-25 03:58:28 -04:00
parent 667c3695e4
commit 66292854ec
2 changed files with 58 additions and 97 deletions

View file

@ -43,6 +43,7 @@ for (op, type) in op_map.items():
token_map[tokenize.NAME] = 'NAME'
token_map[tokenize.NUMBER] = 'NUMBER'
token_map[tokenize.STRING] = 'STRING'
token_map[tokenize.NEWLINE] = 'NEWLINE'
def handle_indent(state, token, stream):
@ -55,28 +56,17 @@ def handle_indent(state, token, stream):
state['indents'].append(level)
yield _new_token('INDENT', token.string, token.start)
try:
n = next(stream)
except:
n = None
if n is not None:
if n.type != tokenize.ENDMARKER:
for i in handle_token(state, n, stream):
yield i
def handle_dollar(state, token, stream):
try:
n = next(stream)
except:
n = next(stream, None)
if n is None:
m = "missing token after $"
yield _new_token("ERRORTOKEN", m, token.start)
if n.start != token.end:
elif n.start != token.end:
m = "unexpected whitespace after $"
yield _new_token("ERRORTOKEN", m, token.start)
if n.type == tokenize.NAME:
elif n.type == tokenize.NAME:
state['last'] = n
yield _new_token('DOLLAR_NAME', '$' + n.string, token.start)
elif n.type == tokenize.OP and n.string == '(':
@ -98,46 +88,39 @@ def handle_dollar(state, token, stream):
def handle_at(state, token, stream):
try:
n = next(stream)
except:
n = next(stream, None)
if n is None:
state['last'] = token
m = "missing token after @"
yield _new_token("ERRORTOKEN", m, token.start)
if n.type == tokenize.OP and n.string == '(' and \
elif n.type == tokenize.OP and n.string == '(' and \
n.start == token.end:
state['pymode'].append(True)
yield _new_token('AT_LPAREN', '@(', token.start)
state['last'] = n
yield _new_token('AT_LPAREN', '@(', token.start)
else:
yield _new_token('AT', '@', token.start)
state['last'] = token
for i in handle_token(state, n, stream):
yield i
yield _new_token('AT', '@', token.start)
yield from handle_token(state, n, stream)
def handle_question(state, token, stream):
try:
n = next(stream)
except:
n = None
n = next(stream, None)
if n.type == tokenize.ERRORTOKEN and n.string == '?' and \
n.start == token.end:
yield _new_token('DOUBLE_QUESTION', '??', token.start)
if n is not None and n.type == tokenize.ERRORTOKEN and \
n.string == '?' and n.start == token.end:
state['last'] = n
yield _new_token('DOUBLE_QUESTION', '??', token.start)
else:
yield _new_token('QUESTION', '?', token.start)
state['last'] = token
for i in handle_token(state, n, stream):
yield i
yield _new_token('QUESTION', '?', token.start)
if n is not None:
yield from handle_token(state, n, stream)
def handle_backtick(state, token, stream):
try:
n = next(stream)
except:
n = None
n = next(stream, None)
found_match = False
sofar = '`'
@ -151,29 +134,15 @@ def handle_backtick(state, token, stream):
except:
n = None
if found_match:
yield _new_token('REGEXPATH', sofar, token.start)
state['last'] = n
yield _new_token('REGEXPATH', sofar, token.start)
else:
state['last'] = token
e = "Could not find matching backtick for regex on line {0}"
m = e.format(token.start[0])
yield _new_token("ERRORTOKEN", m, token.start)
def handle_newline(state, token, stream):
try:
n = next(stream)
except:
n = None
yield _new_token('NEWLINE', '\n', token.start)
state['last'] = token
if n is not None:
if n.type != tokenize.ENDMARKER:
for i in handle_token(state, n, stream):
yield i
def handle_lparen(state, token, stream):
state['pymode'].append(True)
state['last'] = token
@ -213,15 +182,19 @@ def handle_rbracket(state, token, stream):
def handle_error_space(state, token, stream):
if not state['pymode'][-1]:
state['last'] = token
yield _new_token('WS', ' ', token.start)
yield _new_token('WS', token.string, token.start)
else:
yield from []
def handle_ignore(state, token, stream):
yield from []
special_handlers = {
tokenize.ENCODING: lambda s, t, st: [],
tokenize.COMMENT: lambda s, t, st: [],
tokenize.ENDMARKER: lambda s, t, st: [],
tokenize.NEWLINE: handle_newline,
tokenize.COMMENT: handle_ignore,
tokenize.ENCODING: handle_ignore,
tokenize.ENDMARKER: handle_ignore,
(tokenize.OP, '('): handle_lparen,
(tokenize.OP, ')'): handle_rparen,
(tokenize.OP, '['): handle_lbracket,
@ -253,25 +226,22 @@ def handle_token(state, token, stream):
state['last'] = token
yield _new_token(token_map[typ], st, token.start)
elif (typ, st) in special_handlers:
for i in special_handlers[(typ, st)](state, token, stream):
yield i
yield from special_handlers[(typ, st)](state, token, stream)
elif typ in special_handlers:
for i in special_handlers[typ](state, token, stream):
yield i
yield from special_handlers[typ](state, token, stream)
else:
m = "Unexpected token: {0}".format(token)
yield _new_token("ERRORTOKEN", m, token.start)
def preprocess_tokens(tokstream):
tokstream = clear_NL(tokstream)
tokstream = clear_nl(tokstream)
state = {'indents': [0], 'pymode': [True], 'last': None}
for token in tokstream:
for i in handle_token(state, token, tokstream):
yield i
yield from handle_token(state, token, tokstream)
def clear_NL(tokstream):
def clear_nl(tokstream):
for i in tokstream:
if i.type != tokenize.NL:
yield i
@ -284,9 +254,9 @@ def single_error(exc):
def tok(s):
try:
return iter(tokenize.tokenize(BytesIO(s.encode('utf-8')).readline))
return tokenize.tokenize(BytesIO(s.encode('utf-8')).readline)
except Exception as e:
return iter(single_error(e))
return single_error(e)
# synthesize a new PLY token
@ -298,8 +268,7 @@ def _new_token(type, value, pos):
return o
def anyof(*regexes):
return '(' + '|'.join(regexes) + ')'
COMMENT_REGEX = re.compile(r'#.*')
class Lexer(object):
@ -337,16 +306,13 @@ class Lexer(object):
def input(self, s):
"""Calls the lexer on the string s."""
s = re.sub(r'#.*', '', s)
s = re.sub(COMMENT_REGEX, '', s)
self.token_stream = preprocess_tokens(tok(s))
def token(self):
"""Retrieves the next token."""
try:
self.last = next(self.token_stream)
self.last = next(self.token_stream, None)
return self.last
except StopIteration:
return None
def __iter__(self):
t = self.token()

View file

@ -335,14 +335,10 @@ class Parser(object):
@property
def lineno(self):
try:
return self.lexer.last.lineno
except:
if self.lexer.last is None:
return 0
@lineno.setter
def lineno(self, value):
pass
else:
return self.lexer.last.lineno
@property
def col(self):
@ -412,15 +408,12 @@ class Parser(object):
"""newline_or_stmt : NEWLINE
| stmt
"""
if p[1] == '\n':
self.lineno += 1
p[0] = p[1]
def p_newlines(self, p):
"""newlines : NEWLINE
| newlines NEWLINE
"""
self.lineno += 1
p[0] = p[1] if len(p) == 2 else p[1] + p[2]
def p_eval_input(self, p):
@ -470,7 +463,6 @@ class Parser(object):
else:
p0 = ast.Call(func=name, lineno=self.lineno, col_offset=self.col,
**p3)
self.lineno += 1 # needs to be at the end
p[0] = p0
def p_decorators(self, p):
@ -1208,8 +1200,6 @@ class Parser(object):
| NEWLINE INDENT stmt_list DEDENT
"""
p[0] = p[1] if len(p) == 2 else p[3]
if len(p) < 4:
self.lineno += 1 # needs to be at the end
def p_test(self, p):
"""test : or_test
@ -1520,8 +1510,14 @@ class Parser(object):
bt = '`'
if isinstance(p1, (ast.Num, ast.Str, ast.Bytes)):
pass
elif (p1 == 'True') or (p1 == 'False') or (p1 == 'None'):
p1 = ast.NameConstant(value=eval(p1), lineno=self.lineno,
elif p1 == 'True':
p1 = ast.NameConstant(value=True, lineno=self.lineno,
col_offset=self.col)
elif p1 == 'False':
p1 = ast.NameConstant(value=False, lineno=self.lineno,
col_offset=self.col)
elif p1 == 'None':
p1 = ast.NameConstant(value=None, lineno=self.lineno,
col_offset=self.col)
elif p1 == '...':
p1 = ast.Ellipsis(lineno=self.lineno, col_offset=self.col)
@ -2066,7 +2062,7 @@ class Parser(object):
lineno=self.lineno, col=self.col)
p0._cliarg_action = 'extend'
elif p1.startswith('$'):
p0 = self._envvar_by_name(p[1][1:], lineno=self.lineno, col=self.col)
p0 = self._envvar_by_name(p1[1:], lineno=self.lineno, col=self.col)
p0._cliarg_action = 'ensure_list'
else:
p0._cliarg_action = 'append'
@ -2149,8 +2145,7 @@ class Parser(object):
self._parse_error('no further code', None)
elif p.type == 'ERRORTOKEN':
self._parse_error(p.value,
self.currloc(lineno=p.lineno,
column=p.lexpos))
self.currloc(lineno=p.lineno, column=p.lexpos))
else:
msg = 'code: {0}'.format(p.value),
self._parse_error(msg, self.currloc(lineno=p.lineno,