Refactor: reduce API surface of lexer (#4535)

* Refactor: code can *only* be None in else case.

* Refactor: make xonsh_code private as `_source`

* Refactor: make "token_stream" private

* Refactor: make "tolerant" private

* Refactor: add default for _token_stream

* Feat: implement Lexer.reset

* Refactor: cleanup local names in Lexer

* Refactor: make `source_slice` private.

Again, this is just a helper function so for now make it private.

* Refactor: use chained comparison

* Docs: add news

* Update refactor-lexer-parser.rst

Co-authored-by: Noorhteen Raja NJ <jnoortheen@gmail.com>
This commit is contained in:
Angus Hollands 2022-01-17 11:23:14 +00:00 committed by GitHub
parent b76d6f994f
commit a1d94b822e
Failed to generate hash of commit
4 changed files with 65 additions and 36 deletions

View file

@ -0,0 +1,23 @@
**Added:**
* <news item>
**Changed:**
* Privatise certain attributes of lexer/parser to minimise API surface
**Deprecated:**
* <news item>
**Removed:**
* <news item>
**Fixed:**
* <news item>
**Security:**
* <news item>

View file

@ -430,23 +430,29 @@ class Lexer:
self.fname = ""
self.last = None
self.beforelast = None
self.tolerant = tolerant
self._tolerant = tolerant
self._token_stream = iter(())
@property
def tolerant(self):
return self._tolerant
def build(self, **kwargs):
"""Part of the PLY lexer API."""
pass
def reset(self):
pass
self._token_stream = iter(())
self.last = None
self.beforelast = None
def input(self, s):
"""Calls the lexer on the string s."""
self.token_stream = get_tokens(s, self.tolerant)
self._token_stream = get_tokens(s, self._tolerant)
def token(self):
"""Retrieves the next token."""
self.beforelast = self.last
self.last = next(self.token_stream, None)
self.beforelast, self.last = self.last, next(self._token_stream, None)
return self.last
def __iter__(self):
@ -459,28 +465,28 @@ class Lexer:
"""Splits a string into a list of strings which are whitespace-separated
tokens.
"""
vals = []
self.input(s)
elements = []
l = c = -1
ws = "WS"
nl = "\n"
for t in self:
if t.type == ws:
for token in self:
if token.type == ws:
continue
elif l < t.lineno:
vals.append(t.value)
elif len(vals) > 0 and c == t.lexpos:
vals[-1] = vals[-1] + t.value
elif l < token.lineno:
elements.append(token.value)
elif len(elements) > 0 and c == token.lexpos:
elements[-1] = elements[-1] + token.value
else:
vals.append(t.value)
nnl = t.value.count(nl)
elements.append(token.value)
nnl = token.value.count(nl)
if nnl == 0:
l = t.lineno
c = t.lexpos + len(t.value)
l = token.lineno
c = token.lexpos + len(token.value)
else:
l = t.lineno + nnl
c = len(t.value.rpartition(nl)[-1])
return vals
l = token.lineno + nnl
c = len(token.value.rpartition(nl)[-1])
return elements
#
# All the tokens recognized by the lexer
@ -489,7 +495,7 @@ class Lexer:
def tokens(self):
if self._tokens is None:
kwlist = kwmod.kwlist[:]
if PYTHON_VERSION_INFO >= (3, 9, 0) and PYTHON_VERSION_INFO < (3, 10):
if (3, 9, 0) <= PYTHON_VERSION_INFO < (3, 10):
kwlist.remove("__peg_parser__")
t = (
tuple(token_map.values())

View file

@ -277,7 +277,7 @@ class BaseParser:
self.tokens = lexer.tokens
self._lines = None
self.xonsh_code = None
self._source = None
self._attach_nocomma_tok_rules()
self._attach_nocloser_base_rules()
self._attach_nodedent_base_rules()
@ -493,7 +493,7 @@ class BaseParser:
self.lexer.reset()
self._last_yielded_token = None
self._lines = None
self.xonsh_code = None
self._source = None
self._error = None
def parse(self, s, filename="<code>", mode="exec", debug_level=0):
@ -515,7 +515,7 @@ class BaseParser:
tree : AST
"""
self.reset()
self.xonsh_code = s
self._source = s
self.lexer.fname = filename
while self.parser is None:
time.sleep(0.01) # block until the parser is ready
@ -617,11 +617,11 @@ class BaseParser:
@property
def lines(self):
if self._lines is None and self.xonsh_code is not None:
self._lines = self.xonsh_code.splitlines(keepends=True)
if self._lines is None and self._source is not None:
self._lines = self._source.splitlines(keepends=True)
return self._lines
def source_slice(self, start, stop):
def _source_slice(self, start, stop):
"""Gets the original source code from two (line, col) tuples in
source-space (i.e. lineno start at 1).
"""
@ -646,7 +646,7 @@ class BaseParser:
raise SyntaxError()
def _parse_error(self, msg, loc):
raise_parse_error(msg, loc, self.xonsh_code, self.lines)
raise_parse_error(msg, loc, self._source, self.lines)
#
# Precedence of operators
@ -1758,7 +1758,7 @@ class BaseParser:
p3, p5 = p[3], p[5]
beg = (p3.lineno, p3.lexpos)
end = (p5.lineno, p5.lexpos)
s = self.source_slice(beg, end)
s = self._source_slice(beg, end)
s = textwrap.dedent(s)
p[0] = ast.Str(s=s, lineno=beg[0], col_offset=beg[1])
@ -1767,7 +1767,7 @@ class BaseParser:
p1, p3 = p[1], p[3]
beg = (p1.lineno, p1.lexpos + 1)
end = (p3.lineno, p3.lexpos)
s = self.source_slice(beg, end).strip()
s = self._source_slice(beg, end).strip()
p[0] = ast.Str(s=s, lineno=beg[0], col_offset=beg[1])
def _attach_nodedent_base_rules(self):
@ -2572,7 +2572,7 @@ class BaseParser:
ends = p2 + ends
elts = []
for beg, end in zip(begins, ends):
s = self.source_slice(beg, end).strip()
s = self._source_slice(beg, end).strip()
if not s:
if len(begins) == 1:
break
@ -3146,7 +3146,7 @@ class BaseParser:
p3 = p[3]
l = p1.lineno
c = p1.lexpos + 1
subcmd = self.source_slice((l, c), (p3.lineno, p3.lexpos))
subcmd = self._source_slice((l, c), (p3.lineno, p3.lexpos))
subcmd = subcmd.strip() + "\n"
p0 = [
ast.Str(s="xonsh", lineno=l, col_offset=c),
@ -3186,7 +3186,7 @@ class BaseParser:
p3, p5 = p[3], p[5]
beg = (p3.lineno, p3.lexpos + 1)
end = (p5.lineno, p5.lexpos)
s = self.source_slice(beg, end).strip()
s = self._source_slice(beg, end).strip()
node = ast.Str(s=s, lineno=beg[0], col_offset=beg[1])
p[2][-1].elts.append(node)