mirror of
https://github.com/xonsh/xonsh.git
synced 2025-03-04 08:24:40 +01:00
Refactor: reduce API surface of lexer (#4535)
* Refactor: code can *only* be None in else case. * Refactor: make xonsh_code private as `_source` * Refactor: make "token_stream" private * Refactor: make "tolerant" private * Refactor: add default for _token_stream * Feat: implement Lexer.reset * Refactor: cleanup local names in Lexer * Refactor: make `source_slice` private. Again, this is just a helper function so for now make it private. * Refactor: use chained comparison * Docs: add news * Update refactor-lexer-parser.rst Co-authored-by: Noorhteen Raja NJ <jnoortheen@gmail.com>
This commit is contained in:
parent
b76d6f994f
commit
a1d94b822e
4 changed files with 65 additions and 36 deletions
23
news/refactor-lexer-parser.rst
Normal file
23
news/refactor-lexer-parser.rst
Normal file
|
@ -0,0 +1,23 @@
|
|||
**Added:**
|
||||
|
||||
* <news item>
|
||||
|
||||
**Changed:**
|
||||
|
||||
* Privatise certain attributes of lexer/parser to minimise API surface
|
||||
|
||||
**Deprecated:**
|
||||
|
||||
* <news item>
|
||||
|
||||
**Removed:**
|
||||
|
||||
* <news item>
|
||||
|
||||
**Fixed:**
|
||||
|
||||
* <news item>
|
||||
|
||||
**Security:**
|
||||
|
||||
* <news item>
|
|
@ -153,8 +153,8 @@ class Execer:
|
|||
filename=filename,
|
||||
transform=transform,
|
||||
)
|
||||
if code is None:
|
||||
return None # handles comment only input
|
||||
if code is None:
|
||||
return None # handles comment only input
|
||||
return eval(code, glbs, locs)
|
||||
|
||||
def exec(
|
||||
|
@ -186,8 +186,8 @@ class Execer:
|
|||
filename=filename,
|
||||
transform=transform,
|
||||
)
|
||||
if code is None:
|
||||
return None # handles comment only input
|
||||
if code is None:
|
||||
return None # handles comment only input
|
||||
return exec(code, glbs, locs)
|
||||
|
||||
def _print_debug_wrapping(
|
||||
|
|
|
@ -430,23 +430,29 @@ class Lexer:
|
|||
self.fname = ""
|
||||
self.last = None
|
||||
self.beforelast = None
|
||||
self.tolerant = tolerant
|
||||
self._tolerant = tolerant
|
||||
self._token_stream = iter(())
|
||||
|
||||
@property
|
||||
def tolerant(self):
|
||||
return self._tolerant
|
||||
|
||||
def build(self, **kwargs):
|
||||
"""Part of the PLY lexer API."""
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
self._token_stream = iter(())
|
||||
self.last = None
|
||||
self.beforelast = None
|
||||
|
||||
def input(self, s):
|
||||
"""Calls the lexer on the string s."""
|
||||
self.token_stream = get_tokens(s, self.tolerant)
|
||||
self._token_stream = get_tokens(s, self._tolerant)
|
||||
|
||||
def token(self):
|
||||
"""Retrieves the next token."""
|
||||
self.beforelast = self.last
|
||||
self.last = next(self.token_stream, None)
|
||||
self.beforelast, self.last = self.last, next(self._token_stream, None)
|
||||
return self.last
|
||||
|
||||
def __iter__(self):
|
||||
|
@ -459,28 +465,28 @@ class Lexer:
|
|||
"""Splits a string into a list of strings which are whitespace-separated
|
||||
tokens.
|
||||
"""
|
||||
vals = []
|
||||
self.input(s)
|
||||
elements = []
|
||||
l = c = -1
|
||||
ws = "WS"
|
||||
nl = "\n"
|
||||
for t in self:
|
||||
if t.type == ws:
|
||||
for token in self:
|
||||
if token.type == ws:
|
||||
continue
|
||||
elif l < t.lineno:
|
||||
vals.append(t.value)
|
||||
elif len(vals) > 0 and c == t.lexpos:
|
||||
vals[-1] = vals[-1] + t.value
|
||||
elif l < token.lineno:
|
||||
elements.append(token.value)
|
||||
elif len(elements) > 0 and c == token.lexpos:
|
||||
elements[-1] = elements[-1] + token.value
|
||||
else:
|
||||
vals.append(t.value)
|
||||
nnl = t.value.count(nl)
|
||||
elements.append(token.value)
|
||||
nnl = token.value.count(nl)
|
||||
if nnl == 0:
|
||||
l = t.lineno
|
||||
c = t.lexpos + len(t.value)
|
||||
l = token.lineno
|
||||
c = token.lexpos + len(token.value)
|
||||
else:
|
||||
l = t.lineno + nnl
|
||||
c = len(t.value.rpartition(nl)[-1])
|
||||
return vals
|
||||
l = token.lineno + nnl
|
||||
c = len(token.value.rpartition(nl)[-1])
|
||||
return elements
|
||||
|
||||
#
|
||||
# All the tokens recognized by the lexer
|
||||
|
@ -489,7 +495,7 @@ class Lexer:
|
|||
def tokens(self):
|
||||
if self._tokens is None:
|
||||
kwlist = kwmod.kwlist[:]
|
||||
if PYTHON_VERSION_INFO >= (3, 9, 0) and PYTHON_VERSION_INFO < (3, 10):
|
||||
if (3, 9, 0) <= PYTHON_VERSION_INFO < (3, 10):
|
||||
kwlist.remove("__peg_parser__")
|
||||
t = (
|
||||
tuple(token_map.values())
|
||||
|
|
|
@ -277,7 +277,7 @@ class BaseParser:
|
|||
self.tokens = lexer.tokens
|
||||
|
||||
self._lines = None
|
||||
self.xonsh_code = None
|
||||
self._source = None
|
||||
self._attach_nocomma_tok_rules()
|
||||
self._attach_nocloser_base_rules()
|
||||
self._attach_nodedent_base_rules()
|
||||
|
@ -493,7 +493,7 @@ class BaseParser:
|
|||
self.lexer.reset()
|
||||
self._last_yielded_token = None
|
||||
self._lines = None
|
||||
self.xonsh_code = None
|
||||
self._source = None
|
||||
self._error = None
|
||||
|
||||
def parse(self, s, filename="<code>", mode="exec", debug_level=0):
|
||||
|
@ -515,7 +515,7 @@ class BaseParser:
|
|||
tree : AST
|
||||
"""
|
||||
self.reset()
|
||||
self.xonsh_code = s
|
||||
self._source = s
|
||||
self.lexer.fname = filename
|
||||
while self.parser is None:
|
||||
time.sleep(0.01) # block until the parser is ready
|
||||
|
@ -617,11 +617,11 @@ class BaseParser:
|
|||
|
||||
@property
|
||||
def lines(self):
|
||||
if self._lines is None and self.xonsh_code is not None:
|
||||
self._lines = self.xonsh_code.splitlines(keepends=True)
|
||||
if self._lines is None and self._source is not None:
|
||||
self._lines = self._source.splitlines(keepends=True)
|
||||
return self._lines
|
||||
|
||||
def source_slice(self, start, stop):
|
||||
def _source_slice(self, start, stop):
|
||||
"""Gets the original source code from two (line, col) tuples in
|
||||
source-space (i.e. lineno start at 1).
|
||||
"""
|
||||
|
@ -646,7 +646,7 @@ class BaseParser:
|
|||
raise SyntaxError()
|
||||
|
||||
def _parse_error(self, msg, loc):
|
||||
raise_parse_error(msg, loc, self.xonsh_code, self.lines)
|
||||
raise_parse_error(msg, loc, self._source, self.lines)
|
||||
|
||||
#
|
||||
# Precedence of operators
|
||||
|
@ -1758,7 +1758,7 @@ class BaseParser:
|
|||
p3, p5 = p[3], p[5]
|
||||
beg = (p3.lineno, p3.lexpos)
|
||||
end = (p5.lineno, p5.lexpos)
|
||||
s = self.source_slice(beg, end)
|
||||
s = self._source_slice(beg, end)
|
||||
s = textwrap.dedent(s)
|
||||
p[0] = ast.Str(s=s, lineno=beg[0], col_offset=beg[1])
|
||||
|
||||
|
@ -1767,7 +1767,7 @@ class BaseParser:
|
|||
p1, p3 = p[1], p[3]
|
||||
beg = (p1.lineno, p1.lexpos + 1)
|
||||
end = (p3.lineno, p3.lexpos)
|
||||
s = self.source_slice(beg, end).strip()
|
||||
s = self._source_slice(beg, end).strip()
|
||||
p[0] = ast.Str(s=s, lineno=beg[0], col_offset=beg[1])
|
||||
|
||||
def _attach_nodedent_base_rules(self):
|
||||
|
@ -2572,7 +2572,7 @@ class BaseParser:
|
|||
ends = p2 + ends
|
||||
elts = []
|
||||
for beg, end in zip(begins, ends):
|
||||
s = self.source_slice(beg, end).strip()
|
||||
s = self._source_slice(beg, end).strip()
|
||||
if not s:
|
||||
if len(begins) == 1:
|
||||
break
|
||||
|
@ -3146,7 +3146,7 @@ class BaseParser:
|
|||
p3 = p[3]
|
||||
l = p1.lineno
|
||||
c = p1.lexpos + 1
|
||||
subcmd = self.source_slice((l, c), (p3.lineno, p3.lexpos))
|
||||
subcmd = self._source_slice((l, c), (p3.lineno, p3.lexpos))
|
||||
subcmd = subcmd.strip() + "\n"
|
||||
p0 = [
|
||||
ast.Str(s="xonsh", lineno=l, col_offset=c),
|
||||
|
@ -3186,7 +3186,7 @@ class BaseParser:
|
|||
p3, p5 = p[3], p[5]
|
||||
beg = (p3.lineno, p3.lexpos + 1)
|
||||
end = (p5.lineno, p5.lexpos)
|
||||
s = self.source_slice(beg, end).strip()
|
||||
s = self._source_slice(beg, end).strip()
|
||||
node = ast.Str(s=s, lineno=beg[0], col_offset=beg[1])
|
||||
p[2][-1].elts.append(node)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue