From ee16178a6e0cd87c8b960fabe52c66d969956294 Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Fri, 7 Oct 2016 14:14:08 -0400 Subject: [PATCH] remove copy-pasta PLY --- xonsh/ply/LICENSE | 34 - xonsh/ply/__init__.py | 5 - xonsh/ply/cpp.py | 908 ----------- xonsh/ply/ctokens.py | 133 -- xonsh/ply/lex.py | 1097 ------------- xonsh/ply/yacc.py | 3465 ----------------------------------------- xonsh/ply/ygen.py | 74 - 7 files changed, 5716 deletions(-) delete mode 100644 xonsh/ply/LICENSE delete mode 100644 xonsh/ply/__init__.py delete mode 100644 xonsh/ply/cpp.py delete mode 100644 xonsh/ply/ctokens.py delete mode 100644 xonsh/ply/lex.py delete mode 100644 xonsh/ply/yacc.py delete mode 100644 xonsh/ply/ygen.py diff --git a/xonsh/ply/LICENSE b/xonsh/ply/LICENSE deleted file mode 100644 index f71c7d550..000000000 --- a/xonsh/ply/LICENSE +++ /dev/null @@ -1,34 +0,0 @@ -PLY (Python Lex-Yacc) Version 3.8 - -Copyright (C) 2001-2015, -David M. Beazley (Dabeaz LLC) -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -* Neither the name of the David Beazley or Dabeaz LLC may be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -More information about PLY can be obtained on the PLY webpage at: - - http://www.dabeaz.com/ply \ No newline at end of file diff --git a/xonsh/ply/__init__.py b/xonsh/ply/__init__.py deleted file mode 100644 index 65c5e9cec..000000000 --- a/xonsh/ply/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# PLY package -# Author: David Beazley (dave@dabeaz.com) - -__version__ = '3.8' -__all__ = ['lex','yacc'] diff --git a/xonsh/ply/cpp.py b/xonsh/ply/cpp.py deleted file mode 100644 index 2f6a0300d..000000000 --- a/xonsh/ply/cpp.py +++ /dev/null @@ -1,908 +0,0 @@ -# ----------------------------------------------------------------------------- -# cpp.py -# -# Author: David Beazley (http://www.dabeaz.com) -# Copyright (C) 2007 -# All rights reserved -# -# This module implements an ANSI-C style lexical preprocessor for PLY. -# ----------------------------------------------------------------------------- -from __future__ import generators - -# ----------------------------------------------------------------------------- -# Default preprocessor lexer definitions. These tokens are enough to get -# a basic preprocessor working. Other modules may import these if they want -# ----------------------------------------------------------------------------- - -tokens = ( - 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' -) - -literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" - -# Whitespace -def t_CPP_WS(t): - r'\s+' - t.lexer.lineno += t.value.count("\n") - return t - -t_CPP_POUND = r'\#' -t_CPP_DPOUND = r'\#\#' - -# Identifier -t_CPP_ID = r'[A-Za-z_][\w_]*' - -# Integer literal -def CPP_INTEGER(t): - r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' - return t - -t_CPP_INTEGER = CPP_INTEGER - -# Floating literal -t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -def t_CPP_STRING(t): - r'\"([^\\\n]|(\\(.|\n)))*?\"' - t.lexer.lineno += t.value.count("\n") - return t - -# Character constant 'c' or L'c' -def t_CPP_CHAR(t): - r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' - t.lexer.lineno += t.value.count("\n") - return t - -# Comment -def t_CPP_COMMENT1(t): - r'(/\*(.|\n)*?\*/)' - ncr = t.value.count("\n") - t.lexer.lineno += ncr - # replace with one space or a number of '\n' - t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' - return t - -# Line comment -def t_CPP_COMMENT2(t): - r'(//.*?(\n|$))' - # replace with '/n' - t.type = 'CPP_WS'; t.value = '\n' - -def t_error(t): - t.type = t.value[0] - t.value = t.value[0] - t.lexer.skip(1) - return t - -import re -import copy -import time -import os.path - -# ----------------------------------------------------------------------------- -# trigraph() -# -# Given an input string, this function replaces all trigraph sequences. -# The following mapping is used: -# -# ??= # -# ??/ \ -# ??' ^ -# ??( [ -# ??) ] -# ??! | -# ??< { -# ??> } -# ??- ~ -# ----------------------------------------------------------------------------- - -_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') -_trigraph_rep = { - '=':'#', - '/':'\\', - "'":'^', - '(':'[', - ')':']', - '!':'|', - '<':'{', - '>':'}', - '-':'~' -} - -def trigraph(input): - return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) - -# ------------------------------------------------------------------ -# Macro object -# -# This object holds information about preprocessor macros -# -# .name - Macro name (string) -# .value - Macro value (a list of tokens) -# .arglist - List of argument names -# .variadic - Boolean indicating whether or not variadic macro -# .vararg - Name of the variadic parameter -# -# When a macro is created, the macro replacement token sequence is -# pre-scanned and used to create patch lists that are later used -# during macro expansion -# ------------------------------------------------------------------ - -class Macro(object): - def __init__(self,name,value,arglist=None,variadic=False): - self.name = name - self.value = value - self.arglist = arglist - self.variadic = variadic - if variadic: - self.vararg = arglist[-1] - self.source = None - -# ------------------------------------------------------------------ -# Preprocessor object -# -# Object representing a preprocessor. Contains macro definitions, -# include directories, and other information -# ------------------------------------------------------------------ - -class Preprocessor(object): - def __init__(self,lexer=None): - if lexer is None: - lexer = lex.lexer - self.lexer = lexer - self.macros = { } - self.path = [] - self.temp_path = [] - - # Probe the lexer for selected tokens - self.lexprobe() - - tm = time.localtime() - self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) - self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) - self.parser = None - - # ----------------------------------------------------------------------------- - # tokenize() - # - # Utility function. Given a string of text, tokenize into a list of tokens - # ----------------------------------------------------------------------------- - - def tokenize(self,text): - tokens = [] - self.lexer.input(text) - while True: - tok = self.lexer.token() - if not tok: break - tokens.append(tok) - return tokens - - # --------------------------------------------------------------------- - # error() - # - # Report a preprocessor error/warning of some kind - # ---------------------------------------------------------------------- - - def error(self,file,line,msg): - print("%s:%d %s" % (file,line,msg)) - - # ---------------------------------------------------------------------- - # lexprobe() - # - # This method probes the preprocessor lexer object to discover - # the token types of symbols that are important to the preprocessor. - # If this works right, the preprocessor will simply "work" - # with any suitable lexer regardless of how tokens have been named. - # ---------------------------------------------------------------------- - - def lexprobe(self): - - # Determine the token type for identifiers - self.lexer.input("identifier") - tok = self.lexer.token() - if not tok or tok.value != "identifier": - print("Couldn't determine identifier type") - else: - self.t_ID = tok.type - - # Determine the token type for integers - self.lexer.input("12345") - tok = self.lexer.token() - if not tok or int(tok.value) != 12345: - print("Couldn't determine integer type") - else: - self.t_INTEGER = tok.type - self.t_INTEGER_TYPE = type(tok.value) - - # Determine the token type for strings enclosed in double quotes - self.lexer.input("\"filename\"") - tok = self.lexer.token() - if not tok or tok.value != "\"filename\"": - print("Couldn't determine string type") - else: - self.t_STRING = tok.type - - # Determine the token type for whitespace--if any - self.lexer.input(" ") - tok = self.lexer.token() - if not tok or tok.value != " ": - self.t_SPACE = None - else: - self.t_SPACE = tok.type - - # Determine the token type for newlines - self.lexer.input("\n") - tok = self.lexer.token() - if not tok or tok.value != "\n": - self.t_NEWLINE = None - print("Couldn't determine token for newlines") - else: - self.t_NEWLINE = tok.type - - self.t_WS = (self.t_SPACE, self.t_NEWLINE) - - # Check for other characters used by the preprocessor - chars = [ '<','>','#','##','\\','(',')',',','.'] - for c in chars: - self.lexer.input(c) - tok = self.lexer.token() - if not tok or tok.value != c: - print("Unable to lex '%s' required for preprocessor" % c) - - # ---------------------------------------------------------------------- - # add_path() - # - # Adds a search path to the preprocessor. - # ---------------------------------------------------------------------- - - def add_path(self,path): - self.path.append(path) - - # ---------------------------------------------------------------------- - # group_lines() - # - # Given an input string, this function splits it into lines. Trailing whitespace - # is removed. Any line ending with \ is grouped with the next line. This - # function forms the lowest level of the preprocessor---grouping into text into - # a line-by-line format. - # ---------------------------------------------------------------------- - - def group_lines(self,input): - lex = self.lexer.clone() - lines = [x.rstrip() for x in input.splitlines()] - for i in xrange(len(lines)): - j = i+1 - while lines[i].endswith('\\') and (j < len(lines)): - lines[i] = lines[i][:-1]+lines[j] - lines[j] = "" - j += 1 - - input = "\n".join(lines) - lex.input(input) - lex.lineno = 1 - - current_line = [] - while True: - tok = lex.token() - if not tok: - break - current_line.append(tok) - if tok.type in self.t_WS and '\n' in tok.value: - yield current_line - current_line = [] - - if current_line: - yield current_line - - # ---------------------------------------------------------------------- - # tokenstrip() - # - # Remove leading/trailing whitespace tokens from a token list - # ---------------------------------------------------------------------- - - def tokenstrip(self,tokens): - i = 0 - while i < len(tokens) and tokens[i].type in self.t_WS: - i += 1 - del tokens[:i] - i = len(tokens)-1 - while i >= 0 and tokens[i].type in self.t_WS: - i -= 1 - del tokens[i+1:] - return tokens - - - # ---------------------------------------------------------------------- - # collect_args() - # - # Collects comma separated arguments from a list of tokens. The arguments - # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) - # where tokencount is the number of tokens consumed, args is a list of arguments, - # and positions is a list of integers containing the starting index of each - # argument. Each argument is represented by a list of tokens. - # - # When collecting arguments, leading and trailing whitespace is removed - # from each argument. - # - # This function properly handles nested parenthesis and commas---these do not - # define new arguments. - # ---------------------------------------------------------------------- - - def collect_args(self,tokenlist): - args = [] - positions = [] - current_arg = [] - nesting = 1 - tokenlen = len(tokenlist) - - # Search for the opening '('. - i = 0 - while (i < tokenlen) and (tokenlist[i].type in self.t_WS): - i += 1 - - if (i < tokenlen) and (tokenlist[i].value == '('): - positions.append(i+1) - else: - self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") - return 0, [], [] - - i += 1 - - while i < tokenlen: - t = tokenlist[i] - if t.value == '(': - current_arg.append(t) - nesting += 1 - elif t.value == ')': - nesting -= 1 - if nesting == 0: - if current_arg: - args.append(self.tokenstrip(current_arg)) - positions.append(i) - return i+1,args,positions - current_arg.append(t) - elif t.value == ',' and nesting == 1: - args.append(self.tokenstrip(current_arg)) - positions.append(i+1) - current_arg = [] - else: - current_arg.append(t) - i += 1 - - # Missing end argument - self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") - return 0, [],[] - - # ---------------------------------------------------------------------- - # macro_prescan() - # - # Examine the macro value (token sequence) and identify patch points - # This is used to speed up macro expansion later on---we'll know - # right away where to apply patches to the value to form the expansion - # ---------------------------------------------------------------------- - - def macro_prescan(self,macro): - macro.patch = [] # Standard macro arguments - macro.str_patch = [] # String conversion expansion - macro.var_comma_patch = [] # Variadic macro comma patch - i = 0 - while i < len(macro.value): - if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: - argnum = macro.arglist.index(macro.value[i].value) - # Conversion of argument to a string - if i > 0 and macro.value[i-1].value == '#': - macro.value[i] = copy.copy(macro.value[i]) - macro.value[i].type = self.t_STRING - del macro.value[i-1] - macro.str_patch.append((argnum,i-1)) - continue - # Concatenation - elif (i > 0 and macro.value[i-1].value == '##'): - macro.patch.append(('c',argnum,i-1)) - del macro.value[i-1] - continue - elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): - macro.patch.append(('c',argnum,i)) - i += 1 - continue - # Standard expansion - else: - macro.patch.append(('e',argnum,i)) - elif macro.value[i].value == '##': - if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ - ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ - (macro.value[i+1].value == macro.vararg): - macro.var_comma_patch.append(i-1) - i += 1 - macro.patch.sort(key=lambda x: x[2],reverse=True) - - # ---------------------------------------------------------------------- - # macro_expand_args() - # - # Given a Macro and list of arguments (each a token list), this method - # returns an expanded version of a macro. The return value is a token sequence - # representing the replacement macro tokens - # ---------------------------------------------------------------------- - - def macro_expand_args(self,macro,args): - # Make a copy of the macro token sequence - rep = [copy.copy(_x) for _x in macro.value] - - # Make string expansion patches. These do not alter the length of the replacement sequence - - str_expansion = {} - for argnum, i in macro.str_patch: - if argnum not in str_expansion: - str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") - rep[i] = copy.copy(rep[i]) - rep[i].value = str_expansion[argnum] - - # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid - comma_patch = False - if macro.variadic and not args[-1]: - for i in macro.var_comma_patch: - rep[i] = None - comma_patch = True - - # Make all other patches. The order of these matters. It is assumed that the patch list - # has been sorted in reverse order of patch location since replacements will cause the - # size of the replacement sequence to expand from the patch point. - - expanded = { } - for ptype, argnum, i in macro.patch: - # Concatenation. Argument is left unexpanded - if ptype == 'c': - rep[i:i+1] = args[argnum] - # Normal expansion. Argument is macro expanded first - elif ptype == 'e': - if argnum not in expanded: - expanded[argnum] = self.expand_macros(args[argnum]) - rep[i:i+1] = expanded[argnum] - - # Get rid of removed comma if necessary - if comma_patch: - rep = [_i for _i in rep if _i] - - return rep - - - # ---------------------------------------------------------------------- - # expand_macros() - # - # Given a list of tokens, this function performs macro expansion. - # The expanded argument is a dictionary that contains macros already - # expanded. This is used to prevent infinite recursion. - # ---------------------------------------------------------------------- - - def expand_macros(self,tokens,expanded=None): - if expanded is None: - expanded = {} - i = 0 - while i < len(tokens): - t = tokens[i] - if t.type == self.t_ID: - if t.value in self.macros and t.value not in expanded: - # Yes, we found a macro match - expanded[t.value] = True - - m = self.macros[t.value] - if not m.arglist: - # A simple macro - ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) - for e in ex: - e.lineno = t.lineno - tokens[i:i+1] = ex - i += len(ex) - else: - # A macro with arguments - j = i + 1 - while j < len(tokens) and tokens[j].type in self.t_WS: - j += 1 - if tokens[j].value == '(': - tokcount,args,positions = self.collect_args(tokens[j:]) - if not m.variadic and len(args) != len(m.arglist): - self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) - i = j + tokcount - elif m.variadic and len(args) < len(m.arglist)-1: - if len(m.arglist) > 2: - self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) - else: - self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) - i = j + tokcount - else: - if m.variadic: - if len(args) == len(m.arglist)-1: - args.append([]) - else: - args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] - del args[len(m.arglist):] - - # Get macro replacement text - rep = self.macro_expand_args(m,args) - rep = self.expand_macros(rep,expanded) - for r in rep: - r.lineno = t.lineno - tokens[i:j+tokcount] = rep - i += len(rep) - del expanded[t.value] - continue - elif t.value == '__LINE__': - t.type = self.t_INTEGER - t.value = self.t_INTEGER_TYPE(t.lineno) - - i += 1 - return tokens - - # ---------------------------------------------------------------------- - # evalexpr() - # - # Evaluate an expression token sequence for the purposes of evaluating - # integral expressions. - # ---------------------------------------------------------------------- - - def evalexpr(self,tokens): - # tokens = tokenize(line) - # Search for defined macros - i = 0 - while i < len(tokens): - if tokens[i].type == self.t_ID and tokens[i].value == 'defined': - j = i + 1 - needparen = False - result = "0L" - while j < len(tokens): - if tokens[j].type in self.t_WS: - j += 1 - continue - elif tokens[j].type == self.t_ID: - if tokens[j].value in self.macros: - result = "1L" - else: - result = "0L" - if not needparen: break - elif tokens[j].value == '(': - needparen = True - elif tokens[j].value == ')': - break - else: - self.error(self.source,tokens[i].lineno,"Malformed defined()") - j += 1 - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE(result) - del tokens[i+1:j+1] - i += 1 - tokens = self.expand_macros(tokens) - for i,t in enumerate(tokens): - if t.type == self.t_ID: - tokens[i] = copy.copy(t) - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE("0L") - elif t.type == self.t_INTEGER: - tokens[i] = copy.copy(t) - # Strip off any trailing suffixes - tokens[i].value = str(tokens[i].value) - while tokens[i].value[-1] not in "0123456789abcdefABCDEF": - tokens[i].value = tokens[i].value[:-1] - - expr = "".join([str(x.value) for x in tokens]) - expr = expr.replace("&&"," and ") - expr = expr.replace("||"," or ") - expr = expr.replace("!"," not ") - try: - result = eval(expr) - except StandardError: - self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") - result = 0 - return result - - # ---------------------------------------------------------------------- - # parsegen() - # - # Parse an input string/ - # ---------------------------------------------------------------------- - def parsegen(self,input,source=None): - - # Replace trigraph sequences - t = trigraph(input) - lines = self.group_lines(t) - - if not source: - source = "" - - self.define("__FILE__ \"%s\"" % source) - - self.source = source - chunk = [] - enable = True - iftrigger = False - ifstack = [] - - for x in lines: - for i,tok in enumerate(x): - if tok.type not in self.t_WS: break - if tok.value == '#': - # Preprocessor directive - - # insert necessary whitespace instead of eaten tokens - for tok in x: - if tok.type in self.t_WS and '\n' in tok.value: - chunk.append(tok) - - dirtokens = self.tokenstrip(x[i+1:]) - if dirtokens: - name = dirtokens[0].value - args = self.tokenstrip(dirtokens[1:]) - else: - name = "" - args = [] - - if name == 'define': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.define(args) - elif name == 'include': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - oldfile = self.macros['__FILE__'] - for tok in self.include(args): - yield tok - self.macros['__FILE__'] = oldfile - self.source = source - elif name == 'undef': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.undef(args) - elif name == 'ifdef': - ifstack.append((enable,iftrigger)) - if enable: - if not args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'ifndef': - ifstack.append((enable,iftrigger)) - if enable: - if args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'if': - ifstack.append((enable,iftrigger)) - if enable: - result = self.evalexpr(args) - if not result: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'elif': - if ifstack: - if ifstack[-1][0]: # We only pay attention if outer "if" allows this - if enable: # If already true, we flip enable False - enable = False - elif not iftrigger: # If False, but not triggered yet, we'll check expression - result = self.evalexpr(args) - if result: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") - - elif name == 'else': - if ifstack: - if ifstack[-1][0]: - if enable: - enable = False - elif not iftrigger: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #else") - - elif name == 'endif': - if ifstack: - enable,iftrigger = ifstack.pop() - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") - else: - # Unknown preprocessor directive - pass - - else: - # Normal text - if enable: - chunk.extend(x) - - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - - # ---------------------------------------------------------------------- - # include() - # - # Implementation of file-inclusion - # ---------------------------------------------------------------------- - - def include(self,tokens): - # Try to extract the filename and then process an include file - if not tokens: - return - if tokens: - if tokens[0].value != '<' and tokens[0].type != self.t_STRING: - tokens = self.expand_macros(tokens) - - if tokens[0].value == '<': - # Include <...> - i = 1 - while i < len(tokens): - if tokens[i].value == '>': - break - i += 1 - else: - print("Malformed #include <...>") - return - filename = "".join([x.value for x in tokens[1:i]]) - path = self.path + [""] + self.temp_path - elif tokens[0].type == self.t_STRING: - filename = tokens[0].value[1:-1] - path = self.temp_path + [""] + self.path - else: - print("Malformed #include statement") - return - for p in path: - iname = os.path.join(p,filename) - try: - data = open(iname,"r").read() - dname = os.path.dirname(iname) - if dname: - self.temp_path.insert(0,dname) - for tok in self.parsegen(data,filename): - yield tok - if dname: - del self.temp_path[0] - break - except IOError: - pass - else: - print("Couldn't find '%s'" % filename) - - # ---------------------------------------------------------------------- - # define() - # - # Define a new macro - # ---------------------------------------------------------------------- - - def define(self,tokens): - if isinstance(tokens,(str,unicode)): - tokens = self.tokenize(tokens) - - linetok = tokens - try: - name = linetok[0] - if len(linetok) > 1: - mtype = linetok[1] - else: - mtype = None - if not mtype: - m = Macro(name.value,[]) - self.macros[name.value] = m - elif mtype.type in self.t_WS: - # A normal macro - m = Macro(name.value,self.tokenstrip(linetok[2:])) - self.macros[name.value] = m - elif mtype.value == '(': - # A macro with arguments - tokcount, args, positions = self.collect_args(linetok[1:]) - variadic = False - for a in args: - if variadic: - print("No more arguments may follow a variadic argument") - break - astr = "".join([str(_i.value) for _i in a]) - if astr == "...": - variadic = True - a[0].type = self.t_ID - a[0].value = '__VA_ARGS__' - variadic = True - del a[1:] - continue - elif astr[-3:] == "..." and a[0].type == self.t_ID: - variadic = True - del a[1:] - # If, for some reason, "." is part of the identifier, strip off the name for the purposes - # of macro expansion - if a[0].value[-3:] == '...': - a[0].value = a[0].value[:-3] - continue - if len(a) > 1 or a[0].type != self.t_ID: - print("Invalid macro argument") - break - else: - mvalue = self.tokenstrip(linetok[1+tokcount:]) - i = 0 - while i < len(mvalue): - if i+1 < len(mvalue): - if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': - del mvalue[i] - continue - elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: - del mvalue[i+1] - i += 1 - m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) - self.macro_prescan(m) - self.macros[name.value] = m - else: - print("Bad macro definition") - except LookupError: - print("Bad macro definition") - - # ---------------------------------------------------------------------- - # undef() - # - # Undefine a macro - # ---------------------------------------------------------------------- - - def undef(self,tokens): - id = tokens[0].value - try: - del self.macros[id] - except LookupError: - pass - - # ---------------------------------------------------------------------- - # parse() - # - # Parse input text. - # ---------------------------------------------------------------------- - def parse(self,input,source=None,ignore={}): - self.ignore = ignore - self.parser = self.parsegen(input,source) - - # ---------------------------------------------------------------------- - # token() - # - # Method to return individual tokens - # ---------------------------------------------------------------------- - def token(self): - try: - while True: - tok = next(self.parser) - if tok.type not in self.ignore: return tok - except StopIteration: - self.parser = None - return None - -if __name__ == '__main__': - import ply.lex as lex - lexer = lex.lex() - - # Run a preprocessor - import sys - f = open(sys.argv[1]) - input = f.read() - - p = Preprocessor(lexer) - p.parse(input,sys.argv[1]) - while True: - tok = p.token() - if not tok: break - print(p.source, tok) - - - - - - - - - - - diff --git a/xonsh/ply/ctokens.py b/xonsh/ply/ctokens.py deleted file mode 100644 index f6f6952d6..000000000 --- a/xonsh/ply/ctokens.py +++ /dev/null @@ -1,133 +0,0 @@ -# ---------------------------------------------------------------------- -# ctokens.py -# -# Token specifications for symbols in ANSI C and C++. This file is -# meant to be used as a library in other tokenizers. -# ---------------------------------------------------------------------- - -# Reserved words - -tokens = [ - # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER', - - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', - 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', - 'LOR', 'LAND', 'LNOT', - 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', - - # Increment/decrement (++,--) - 'INCREMENT', 'DECREMENT', - - # Structure dereference (->) - 'ARROW', - - # Ternary operator (?) - 'TERNARY', - - # Delimeters ( ) [ ] { } , . ; : - 'LPAREN', 'RPAREN', - 'LBRACKET', 'RBRACKET', - 'LBRACE', 'RBRACE', - 'COMMA', 'PERIOD', 'SEMI', 'COLON', - - # Ellipsis (...) - 'ELLIPSIS', -] - -# Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MODULO = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' - -# Assignment operators - -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'\^=' - -# Increment/decrement -t_INCREMENT = r'\+\+' -t_DECREMENT = r'--' - -# -> -t_ARROW = r'->' - -# ? -t_TERNARY = r'\?' - -# Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' - -# Identifiers -t_ID = r'[A-Za-z_][A-Za-z0-9_]*' - -# Integer literal -t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' - -# Floating literal -t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -t_STRING = r'\"([^\\\n]|(\\.))*?\"' - -# Character constant 'c' or L'c' -t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' - -# Comment (C-Style) -def t_COMMENT(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - return t - -# Comment (C++-Style) -def t_CPPCOMMENT(t): - r'//.*\n' - t.lexer.lineno += 1 - return t - - - - - - diff --git a/xonsh/ply/lex.py b/xonsh/ply/lex.py deleted file mode 100644 index 3a40f62d9..000000000 --- a/xonsh/ply/lex.py +++ /dev/null @@ -1,1097 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: lex.py -# -# Copyright (C) 2001-2015, -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- - -__version__ = '3.8' -__tabversion__ = '3.8' - -import re -import sys -import types -import copy -import os -import inspect - -# This tuple contains known string types -try: - # Python 2.6 - StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - # Python 3.0 - StringTypes = (str, bytes) - -# This regular expression is used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') - -# Exception thrown when invalid token encountered and no default error -# handler is defined. -class LexError(Exception): - def __init__(self, message, s): - self.args = (message,) - self.text = s - - -# Token class. This class is used to represent the tokens produced. -class LexToken(object): - def __str__(self): - return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) - - def __repr__(self): - return str(self) - - -# This object is a stand-in for a logging object created by the -# logging module. - -class PlyLogger(object): - def __init__(self, f): - self.f = f - - def critical(self, msg, *args, **kwargs): - self.f.write((msg % args) + '\n') - - def warning(self, msg, *args, **kwargs): - self.f.write('WARNING: ' + (msg % args) + '\n') - - def error(self, msg, *args, **kwargs): - self.f.write('ERROR: ' + (msg % args) + '\n') - - info = critical - debug = critical - - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self, name): - return self - - def __call__(self, *args, **kwargs): - return self - - -# ----------------------------------------------------------------------------- -# === Lexing Engine === -# -# The following Lexer class implements the lexer runtime. There are only -# a few public methods and attributes: -# -# input() - Store a new string in the lexer -# token() - Get the next token -# clone() - Clone the lexer -# -# lineno - Current line number -# lexpos - Current position in the input string -# ----------------------------------------------------------------------------- - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re, findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = 'INITIAL' # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexstateeoff = {} # Dictionary of eof functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lexeoff = None # EOF rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = '' # Ignored characters - self.lexliterals = '' # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - self.lexoptimize = False # Optimized mode - - def clone(self, object=None): - c = copy.copy(self) - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = {} - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object, f[0].__name__), f[1])) - newre.append((cre, newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = {} - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object, ef.__name__) - c.lexmodule = object - return c - - # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self, lextab, outputdir=''): - if isinstance(lextab, types.ModuleType): - raise IOError("Won't overwrite existing lextab module") - basetabmodule = lextab.split('.')[-1] - filename = os.path.join(outputdir, basetabmodule) + '.py' - with open(filename, 'w') as tf: - tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) - tf.write('_tabversion = %s\n' % repr(__tabversion__)) - tf.write('_lextokens = %s\n' % repr(self.lextokens)) - tf.write('_lexreflags = %s\n' % repr(self.lexreflags)) - tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) - tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) - - # Rewrite the lexstatere table, replacing function objects with function names - tabre = {} - for statename, lre in self.lexstatere.items(): - titem = [] - for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): - titem.append((retext, _funcs_to_names(func, renames))) - tabre[statename] = titem - - tf.write('_lexstatere = %s\n' % repr(tabre)) - tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) - - taberr = {} - for statename, ef in self.lexstateerrorf.items(): - taberr[statename] = ef.__name__ if ef else None - tf.write('_lexstateerrorf = %s\n' % repr(taberr)) - - tabeof = {} - for statename, ef in self.lexstateeoff.items(): - tabeof[statename] = ef.__name__ if ef else None - tf.write('_lexstateeoff = %s\n' % repr(tabeof)) - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self, tabfile, fdict): - if isinstance(tabfile, types.ModuleType): - lextab = tabfile - else: - exec('import %s' % tabfile) - lextab = sys.modules[tabfile] - - if getattr(lextab, '_tabversion', '0.0') != __tabversion__: - raise ImportError('Inconsistent PLY version') - - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lextokens_all = self.lextokens | set(self.lexliterals) - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = {} - self.lexstateretext = {} - for statename, lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for pat, func_name in lre: - titem.append((re.compile(pat, lextab._lexreflags | re.VERBOSE), _names_to_funcs(func_name, fdict))) - - self.lexstatere[statename] = titem - self.lexstateretext[statename] = txtitem - - self.lexstateerrorf = {} - for statename, ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[statename] = fdict[ef] - - self.lexstateeoff = {} - for statename, ef in lextab._lexstateeoff.items(): - self.lexstateeoff[statename] = fdict[ef] - - self.begin('INITIAL') - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self, s): - # Pull off the first character to see if s looks like a string - c = s[:1] - if not isinstance(c, StringTypes): - raise ValueError('Expected a string') - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self, state): - if state not in self.lexstatere: - raise ValueError('Undefined state') - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state, '') - self.lexerrorf = self.lexstateerrorf.get(state, None) - self.lexeoff = self.lexstateeoff.get(state, None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self, state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self, n): - self.lexpos += n - - # ------------------------------------------------------------ - # opttoken() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre, lexindexfunc in self.lexre: - m = lexre.match(lexdata, lexpos) - if not m: - continue - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - - i = m.lastindex - func, tok.type = lexindexfunc[i] - - if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break - - lexpos = m.end() - - # If token is processed by a function, call it - - tok.lexer = self # Set additional attributes useful in token rules - self.lexmatch = m - self.lexpos = lexpos - - newtok = func(tok) - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - lexignore = self.lexignore # This is here in case there was a state change - break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if newtok.type not in self.lextokens_all: - raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func.__code__.co_filename, func.__code__.co_firstlineno, - func.__name__, newtok.type), lexdata[lexpos:]) - - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = 'error' - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: - continue - return newtok - - self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) - - if self.lexeoff: - tok = LexToken() - tok.type = 'eof' - tok.value = '' - tok.lineno = self.lineno - tok.lexpos = lexpos - tok.lexer = self - self.lexpos = lexpos - newtok = self.lexeoff(tok) - return newtok - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError('No input string given with input()') - return None - - # Iterator interface - def __iter__(self): - return self - - def next(self): - t = self.token() - if t is None: - raise StopIteration - return t - - __next__ = next - -# ----------------------------------------------------------------------------- -# ==== Lex Builder === -# -# The functions and classes below are used to collect lexing information -# and build a Lexer object from it. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# _get_regex(func) -# -# Returns the regular expression assigned to a function either as a doc string -# or as a .regex attribute attached by the @TOKEN decorator. -# ----------------------------------------------------------------------------- -def _get_regex(func): - return getattr(func, 'regex', func.__doc__) - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- -def get_caller_module_dict(levels): - f = sys._getframe(levels) - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - return ldict - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- -def _funcs_to_names(funclist, namelist): - result = [] - for f, name in zip(funclist, namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- -def _names_to_funcs(namelist, fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]], n[1])) - else: - result.append(n) - return result - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- -def _form_master_re(relist, reflags, ldict, toknames): - if not relist: - return [] - regex = '|'.join(relist) - try: - lexre = re.compile(regex, re.VERBOSE | reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) - lexindexnames = lexindexfunc[:] - - for f, i in lexre.groupindex.items(): - handle = ldict.get(f, None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle, toknames[f]) - lexindexnames[i] = f - elif handle is not None: - lexindexnames[i] = f - if f.find('ignore_') > 0: - lexindexfunc[i] = (None, None) - else: - lexindexfunc[i] = (None, toknames[f]) - - return [(lexre, lexindexfunc)], [regex], [lexindexnames] - except Exception: - m = int(len(relist)/2) - if m == 0: - m = 1 - llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) - rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) - return (llist+rlist), (lre+rre), (lnames+rnames) - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- -def _statetoken(s, names): - nonstate = 1 - parts = s.split('_') - for i, part in enumerate(parts[1:], 1): - if part not in names and part != 'ANY': - break - - if i > 1: - states = tuple(parts[1:i]) - else: - states = ('INITIAL',) - - if 'ANY' in states: - states = tuple(names) - - tokenname = '_'.join(parts[i:]) - return (states, tokenname) - - -# ----------------------------------------------------------------------------- -# LexerReflect() -# -# This class represents information needed to build a lexer as extracted from a -# user's input file. -# ----------------------------------------------------------------------------- -class LexerReflect(object): - def __init__(self, ldict, log=None, reflags=0): - self.ldict = ldict - self.error_func = None - self.tokens = [] - self.reflags = reflags - self.stateinfo = {'INITIAL': 'inclusive'} - self.modules = set() - self.error = False - self.log = PlyLogger(sys.stderr) if log is None else log - - # Get all of the basic information - def get_all(self): - self.get_tokens() - self.get_literals() - self.get_states() - self.get_rules() - - # Validate all of the information - def validate_all(self): - self.validate_tokens() - self.validate_literals() - self.validate_rules() - return self.error - - # Get the tokens map - def get_tokens(self): - tokens = self.ldict.get('tokens', None) - if not tokens: - self.log.error('No token list is defined') - self.error = True - return - - if not isinstance(tokens, (list, tuple)): - self.log.error('tokens must be a list or tuple') - self.error = True - return - - if not tokens: - self.log.error('tokens is empty') - self.error = True - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - terminals = {} - for n in self.tokens: - if not _is_identifier.match(n): - self.log.error("Bad token name '%s'", n) - self.error = True - if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 - - # Get the literals specifier - def get_literals(self): - self.literals = self.ldict.get('literals', '') - if not self.literals: - self.literals = '' - - # Validate literals - def validate_literals(self): - try: - for c in self.literals: - if not isinstance(c, StringTypes) or len(c) > 1: - self.log.error('Invalid literal %s. Must be a single character', repr(c)) - self.error = True - - except TypeError: - self.log.error('Invalid literals specification. literals must be a sequence of characters') - self.error = True - - def get_states(self): - self.states = self.ldict.get('states', None) - # Build statemap - if self.states: - if not isinstance(self.states, (tuple, list)): - self.log.error('states must be defined as a tuple or list') - self.error = True - else: - for s in self.states: - if not isinstance(s, tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) - self.error = True - continue - name, statetype = s - if not isinstance(name, StringTypes): - self.log.error('State name %s must be a string', repr(name)) - self.error = True - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) - self.error = True - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined", name) - self.error = True - continue - self.stateinfo[name] = statetype - - # Get all of the symbols with a t_ prefix and sort them into various - # categories (functions, strings, error functions, and ignore characters) - - def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_'] - - # Now build up a list of functions and a list of strings - self.toknames = {} # Mapping of symbols to token names - self.funcsym = {} # Symbols defined as functions - self.strsym = {} # Symbols defined as strings - self.ignore = {} # Ignore strings by state - self.errorf = {} # Error functions by state - self.eoff = {} # EOF functions by state - - for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] - - if len(tsymbols) == 0: - self.log.error('No rules of the form t_rulename are defined') - self.error = True - return - - for f in tsymbols: - t = self.ldict[f] - states, tokname = _statetoken(f, self.stateinfo) - self.toknames[f] = tokname - - if hasattr(t, '__call__'): - if tokname == 'error': - for s in states: - self.errorf[s] = t - elif tokname == 'eof': - for s in states: - self.eoff[s] = t - elif tokname == 'ignore': - line = t.__code__.co_firstlineno - file = t.__code__.co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) - self.error = True - else: - for s in states: - self.funcsym[s].append((f, t)) - elif isinstance(t, StringTypes): - if tokname == 'ignore': - for s in states: - self.ignore[s] = t - if '\\' in t: - self.log.warning("%s contains a literal backslash '\\'", f) - - elif tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", f) - self.error = True - else: - for s in states: - self.strsym[s].append((f, t)) - else: - self.log.error('%s not defined as a function or string', f) - self.error = True - - # Sort the functions by line number - for f in self.funcsym.values(): - f.sort(key=lambda x: x[1].__code__.co_firstlineno) - - # Sort the strings by regular expression length - for s in self.strsym.values(): - s.sort(key=lambda x: len(x[1]), reverse=True) - - # Validate all of the t_rules collected - def validate_rules(self): - for state in self.stateinfo: - # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - tokname = self.toknames[fname] - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) - self.error = True - continue - - if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) - self.error = True - continue - - if not _get_regex(f): - self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) - self.error = True - continue - - try: - c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), re.VERBOSE | self.reflags) - if c.match(''): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) - self.error = True - except re.error as e: - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) - if '#' in _get_regex(f): - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) - self.error = True - - # Validate all rules defined by strings - for name, r in self.strsym[state]: - tokname = self.toknames[name] - if tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", name) - self.error = True - continue - - if tokname not in self.tokens and tokname.find('ignore_') < 0: - self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) - self.error = True - continue - - try: - c = re.compile('(?P<%s>%s)' % (name, r), re.VERBOSE | self.reflags) - if (c.match('')): - self.log.error("Regular expression for rule '%s' matches empty string", name) - self.error = True - except re.error as e: - self.log.error("Invalid regular expression for rule '%s'. %s", name, e) - if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) - self.error = True - - if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'", state) - self.error = True - - # Validate the error function - efunc = self.errorf.get(state, None) - if efunc: - f = efunc - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) - self.error = True - - if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) - self.error = True - - for module in self.modules: - self.validate_module(module) - - # ----------------------------------------------------------------------------- - # validate_module() - # - # This checks to see if there are duplicated t_rulename() functions or strings - # in the parser input file. This is done using a simple regular expression - # match on each line in the source code of the given module. - # ----------------------------------------------------------------------------- - - def validate_module(self, module): - lines, linen = inspect.getsourcelines(module) - - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - - counthash = {} - linen += 1 - for line in lines: - m = fre.match(line) - if not m: - m = sre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - filename = inspect.getsourcefile(module) - self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) - self.error = True - linen += 1 - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', - reflags=0, nowarn=False, outputdir=None, debuglog=None, errorlog=None): - - if lextab is None: - lextab = 'lextab' - - global lexer - - ldict = None - stateinfo = {'INITIAL': 'inclusive'} - lexobj = Lexer() - lexobj.lexoptimize = optimize - global token, input - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - if debug: - if debuglog is None: - debuglog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the lexer - if object: - module = object - - # Get the module dictionary used for the parser - if module: - _items = [(k, getattr(module, k)) for k in dir(module)] - ldict = dict(_items) - # If no __file__ attribute is available, try to obtain it from the __module__ instead - if '__file__' not in ldict: - ldict['__file__'] = sys.modules[ldict['__module__']].__file__ - else: - ldict = get_caller_module_dict(2) - - # Determine if the module is package of a package or not. - # If so, fix the tabmodule setting so that tables load correctly - pkg = ldict.get('__package__') - if pkg and isinstance(lextab, str): - if '.' not in lextab: - lextab = pkg + '.' + lextab - - # Collect parser information from the dictionary - linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) - linfo.get_all() - if not optimize: - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - if optimize and lextab: - try: - lexobj.readtab(lextab, ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass - - # Dump some basic debugging information - if debug: - debuglog.info('lex: tokens = %r', linfo.tokens) - debuglog.info('lex: literals = %r', linfo.literals) - debuglog.info('lex: states = %r', linfo.stateinfo) - - # Build a dictionary of valid token names - lexobj.lextokens = set() - for n in linfo.tokens: - lexobj.lextokens.add(n) - - # Get literals specification - if isinstance(linfo.literals, (list, tuple)): - lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) - else: - lexobj.lexliterals = linfo.literals - - lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) - - # Get the stateinfo dictionary - stateinfo = linfo.stateinfo - - regexs = {} - # Build the master regular expressions - for state in stateinfo: - regex_list = [] - - # Add rules defined by functions first - for fname, f in linfo.funcsym[state]: - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) - - # Now add all of the simple rules - for name, r in linfo.strsym[state]: - regex_list.append('(?P<%s>%s)' % (name, r)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) - - regexs[state] = regex_list - - # Build the master regular expressions - - if debug: - debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') - - for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - lexobj.lexstaterenames[state] = re_names - if debug: - for i, text in enumerate(re_text): - debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) - - # For inclusive states, we need to add the regular expressions from the INITIAL state - for state, stype in stateinfo.items(): - if state != 'INITIAL' and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere['INITIAL'] - lexobj.lexretext = lexobj.lexstateretext['INITIAL'] - lexobj.lexreflags = reflags - - # Set up ignore variables - lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') - - # Set up error functions - lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) - if not lexobj.lexerrorf: - errorlog.warning('No t_error rule is defined') - - # Set up eof functions - lexobj.lexstateeoff = linfo.eoff - lexobj.lexeoff = linfo.eoff.get('INITIAL', None) - - # Check state information for ignore and error rules - for s, stype in stateinfo.items(): - if stype == 'exclusive': - if s not in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if s not in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) - elif stype == 'inclusive': - if s not in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get('INITIAL', None) - if s not in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get('INITIAL', '') - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - # If in optimize mode, we write the lextab - if lextab and optimize: - if outputdir is None: - # If no output directory is set, the location of the output files - # is determined according to the following rules: - # - If lextab specifies a package, files go into that package directory - # - Otherwise, files go in the same directory as the specifying module - if isinstance(lextab, types.ModuleType): - srcfile = lextab.__file__ - else: - if '.' not in lextab: - srcfile = ldict['__file__'] - else: - parts = lextab.split('.') - pkgname = '.'.join(parts[:-1]) - exec('import %s' % pkgname) - srcfile = getattr(sys.modules[pkgname], '__file__', '') - outputdir = os.path.dirname(srcfile) - try: - lexobj.writetab(lextab, outputdir) - except IOError as e: - errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) - - return lexobj - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - -def runmain(lexer=None, data=None): - if not data: - try: - filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() - except IndexError: - sys.stdout.write('Reading from standard input (type EOF to end):\n') - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while True: - tok = _token() - if not tok: - break - sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - -def TOKEN(r): - def set_regex(f): - if hasattr(r, '__call__'): - f.regex = _get_regex(r) - else: - f.regex = r - return f - return set_regex - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - diff --git a/xonsh/ply/yacc.py b/xonsh/ply/yacc.py deleted file mode 100644 index 3597c0082..000000000 --- a/xonsh/ply/yacc.py +++ /dev/null @@ -1,3465 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: yacc.py -# -# Copyright (C) 2001-2015, -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -import re -import types -import sys -import os.path -import inspect -import base64 -import warnings - -__version__ = '3.8' -__tabversion__ = '3.8' - -#----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- - -yaccdebug = True # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory - -debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - -error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -yaccdevel = False # Set to True if developing yacc. This turns off optimized - # implementations of certain functions. - -resultlimit = 40 # Size limit of results when running in debug mode. - -pickle_protocol = 0 # Protocol to use when writing pickle files - -MAXINT = sys.maxsize - -# This object is a stand-in for a logging object created by the -# logging module. PLY will use this by default to create things -# such as the parser.out file. If a user wants more detailed -# information, they can create their own logging object and pass -# it into PLY. - -class PlyLogger(object): - def __init__(self, f): - self.f = f - - def debug(self, msg, *args, **kwargs): - self.f.write((msg % args) + '\n') - - info = debug - - def warning(self, msg, *args, **kwargs): - self.f.write('WARNING: ' + (msg % args) + '\n') - - def error(self, msg, *args, **kwargs): - self.f.write('ERROR: ' + (msg % args) + '\n') - - critical = debug - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self, name): - return self - - def __call__(self, *args, **kwargs): - return self - -# Exception raised for yacc-related errors -class YaccError(Exception): - pass - -# Format the result message that the parser produces when running in debug mode. -def format_result(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit] + ' ...' - result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) - return result - -# Format stack entries when the parser is running in debug mode -def format_stack_entry(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) < 16: - return repr_str - else: - return '<%s @ 0x%x>' % (type(r).__name__, id(r)) - -# Panic mode error recovery support. This feature is being reworked--much of the -# code here is to offer a deprecation/backwards compatible transition - -_errok = None -_token = None -_restart = None -_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). -Instead, invoke the methods on the associated parser instance: - - def p_error(p): - ... - # Use parser.errok(), parser.token(), parser.restart() - ... - - parser = yacc.yacc() -''' - -def errok(): - warnings.warn(_warnmsg) - return _errok() - -def restart(): - warnings.warn(_warnmsg) - return _restart() - -def token(): - warnings.warn(_warnmsg) - return _token() - -# Utility function to call the p_error() function with some deprecation hacks -def call_errorfunc(errorfunc, token, parser): - global _errok, _token, _restart - _errok = parser.errok - _token = parser.token - _restart = parser.restart - r = errorfunc(token) - try: - del _errok, _token, _restart - except NameError: - pass - return r - -#----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -#----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - -class YaccSymbol: - def __str__(self): - return self.type - - def __repr__(self): - return str(self) - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self, s, stack=None): - self.slice = s - self.stack = stack - self.lexer = None - self.parser = None - - def __getitem__(self, n): - if isinstance(n, slice): - return [s.value for s in self.slice[n]] - elif n >= 0: - return self.slice[n].value - else: - return self.stack[n].value - - def __setitem__(self, n, v): - self.slice[n].value = v - - def __getslice__(self, i, j): - return [s.value for s in self.slice[i:j]] - - def __len__(self): - return len(self.slice) - - def lineno(self, n): - return getattr(self.slice[n], 'lineno', 0) - - def set_lineno(self, n, lineno): - self.slice[n].lineno = lineno - - def linespan(self, n): - startline = getattr(self.slice[n], 'lineno', 0) - endline = getattr(self.slice[n], 'endlineno', startline) - return startline, endline - - def lexpos(self, n): - return getattr(self.slice[n], 'lexpos', 0) - - def lexspan(self, n): - startpos = getattr(self.slice[n], 'lexpos', 0) - endpos = getattr(self.slice[n], 'endlexpos', startpos) - return startpos, endpos - - def error(self): - raise SyntaxError - -# ----------------------------------------------------------------------------- -# == LRParser == -# -# The LR Parsing engine. -# ----------------------------------------------------------------------------- - -class LRParser: - def __init__(self, lrtab, errorf): - self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf - self.set_defaulted_states() - self.errorok = True - - def errok(self): - self.errorok = True - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - # Defaulted state support. - # This method identifies parser states where there is only one possible reduction action. - # For such states, the parser can make a choose to make a rule reduction without consuming - # the next look-ahead token. This delayed invocation of the tokenizer can be useful in - # certain kinds of advanced parsing situations where the lexer and parser interact with - # each other or change states (i.e., manipulation of scope, lexer states, etc.). - # - # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions - def set_defaulted_states(self): - self.defaulted_states = {} - for state, actions in self.action.items(): - rules = list(actions.values()) - if len(rules) == 1 and rules[0] < 0: - self.defaulted_states[state] = rules[0] - - def disable_defaulted_states(self): - self.defaulted_states = {} - - def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): - if debug or yaccdevel: - if isinstance(debug, int): - debug = PlyLogger(sys.stderr) - return self.parsedebug(input, lexer, debug, tracking, tokenfunc) - elif tracking: - return self.parseopt(input, lexer, debug, tracking, tokenfunc) - else: - return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) - - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parsedebug(). - # - # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. Optimized versions of this function - # are automatically created by the ply/ygen.py script. This script cuts out - # sections enclosed in markers such as this: - # - # #--! DEBUG - # statements - # #--! DEBUG - # - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): - #--! parsedebug-start - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - #--! DEBUG - debug.info('PLY: PARSE DEBUG START') - #--! DEBUG - - # If no lexer was given, we will try to use the lex module - if not lexer: - from . import lex - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set the parser() token method (sometimes used in error recovery) - self.token = get_token - - # Set up the state and symbol stacks - - statestack = [] # Stack of parsing states - self.statestack = statestack - symstack = [] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - #--! DEBUG - debug.debug('') - debug.debug('State : %s', state) - #--! DEBUG - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - #--! DEBUG - debug.debug('Defaulted state %s: Reduce using %d', state, -t) - #--! DEBUG - - #--! DEBUG - debug.debug('Stack : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - #--! DEBUG - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - #--! DEBUG - debug.debug('Action : Shift and goto state %s', t) - #--! DEBUG - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: - errorcount -= 1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - #--! DEBUG - if plen: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, - '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', - goto[statestack[-1-plen]][pname]) - else: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], - goto[statestack[-1]][pname]) - - #--! DEBUG - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - #--! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, 'endlineno', t1.lineno) - sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) - #--! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - #--! DEBUG - debug.info('Result : %s', format_result(pslice[0])) - #--! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - #--! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - #--! TRACKING - - targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - #--! DEBUG - debug.info('Result : %s', format_result(pslice[0])) - #--! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n, 'value', None) - #--! DEBUG - debug.info('Done : Returning %s', format_result(result)) - debug.info('PLY: PARSE DEBUG END') - #--! DEBUG - return result - - if t is None: - - #--! DEBUG - debug.error('Error : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - #--! DEBUG - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = False - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, 'lexer'): - errtoken.lexer = lexer - tok = call_errorfunc(self.errorfunc, errtoken, self) - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken, 'lineno'): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) - else: - sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) - else: - sys.stderr.write('yacc: Parse error in input. EOF\n') - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - #--! TRACKING - if tracking: - sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) - sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) - #--! TRACKING - lookahead = None - continue - - # Create the error symbol for the first time and make it the new lookahead symbol - t = YaccSymbol() - t.type = 'error' - - if hasattr(lookahead, 'lineno'): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, 'lexpos'): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - sym = symstack.pop() - #--! TRACKING - if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos - #--! TRACKING - statestack.pop() - state = statestack[-1] - - continue - - # Call an error function here - raise RuntimeError('yacc: internal parser error!!!\n') - - #--! parsedebug-end - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt(). - # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! - # This code is automatically generated by the ply/ygen.py script. Make - # changes to the parsedebug() method instead. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): - #--! parseopt-start - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - - # If no lexer was given, we will try to use the lex module - if not lexer: - from . import lex - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set the parser() token method (sometimes used in error recovery) - self.token = get_token - - # Set up the state and symbol stacks - - statestack = [] # Stack of parsing states - self.statestack = statestack - symstack = [] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: - errorcount -= 1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - #--! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, 'endlineno', t1.lineno) - sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) - #--! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - #--! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - #--! TRACKING - - targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n, 'value', None) - return result - - if t is None: - - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = False - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, 'lexer'): - errtoken.lexer = lexer - tok = call_errorfunc(self.errorfunc, errtoken, self) - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken, 'lineno'): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) - else: - sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) - else: - sys.stderr.write('yacc: Parse error in input. EOF\n') - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - #--! TRACKING - if tracking: - sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) - sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) - #--! TRACKING - lookahead = None - continue - - # Create the error symbol for the first time and make it the new lookahead symbol - t = YaccSymbol() - t.type = 'error' - - if hasattr(lookahead, 'lineno'): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, 'lexpos'): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - sym = symstack.pop() - #--! TRACKING - if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos - #--! TRACKING - statestack.pop() - state = statestack[-1] - - continue - - # Call an error function here - raise RuntimeError('yacc: internal parser error!!!\n') - - #--! parseopt-end - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt_notrack(). - # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated - # by the ply/ygen.py script. Make changes to the parsedebug() method instead. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): - #--! parseopt-notrack-start - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - - # If no lexer was given, we will try to use the lex module - if not lexer: - from . import lex - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set the parser() token method (sometimes used in error recovery) - self.token = get_token - - # Set up the state and symbol stacks - - statestack = [] # Stack of parsing states - self.statestack = statestack - symstack = [] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: - errorcount -= 1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - - targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n, 'value', None) - return result - - if t is None: - - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = False - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, 'lexer'): - errtoken.lexer = lexer - tok = call_errorfunc(self.errorfunc, errtoken, self) - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken, 'lineno'): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) - else: - sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) - else: - sys.stderr.write('yacc: Parse error in input. EOF\n') - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - - # Create the error symbol for the first time and make it the new lookahead symbol - t = YaccSymbol() - t.type = 'error' - - if hasattr(lookahead, 'lineno'): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, 'lexpos'): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - sym = symstack.pop() - statestack.pop() - state = statestack[-1] - - continue - - # Call an error function here - raise RuntimeError('yacc: internal parser error!!!\n') - - #--! parseopt-notrack-end - -# ----------------------------------------------------------------------------- -# === Grammar Representation === -# -# The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. -# ----------------------------------------------------------------------------- - -# regex matching identifiers -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# A grammar rule refers to a specification such as this: -# -# expr : expr PLUS term -# -# Here are the basic attributes defined on all productions -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','PLUS','term'] -# prec - Production precedence level -# number - Production number. -# func - Function that executes on reduce -# file - File where production function is defined -# lineno - Line number where production function is defined -# -# The following attributes are defined or optional. -# -# len - Length of the production (number of symbols on right hand side) -# usyms - Set of unique symbols found in the production -# ----------------------------------------------------------------------------- - -class Production(object): - reduced = 0 - def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): - self.name = name - self.prod = tuple(prod) - self.number = number - self.func = func - self.callable = None - self.file = file - self.line = line - self.prec = precedence - - # Internal settings used during table construction - - self.len = len(self.prod) # Length of the production - - # Create a list of unique production symbols used in the production - self.usyms = [] - for s in self.prod: - if s not in self.usyms: - self.usyms.append(s) - - # List of all LR items for the production - self.lr_items = [] - self.lr_next = None - - # Create a string representation - if self.prod: - self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) - else: - self.str = '%s -> ' % self.name - - def __str__(self): - return self.str - - def __repr__(self): - return 'Production(' + str(self) + ')' - - def __len__(self): - return len(self.prod) - - def __nonzero__(self): - return 1 - - def __getitem__(self, index): - return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self, n): - if n > len(self.prod): - return None - p = LRItem(self, n) - # Precompute the list of productions immediately following. - try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError, KeyError): - p.lr_after = [] - try: - p.lr_before = p.prod[n-1] - except IndexError: - p.lr_before = None - return p - - # Bind the production function name to a callable - def bind(self, pdict): - if self.func: - self.callable = pdict[self.func] - -# This class serves as a minimal standin for Production objects when -# reading table data from files. It only contains information -# actually used by the LR parsing engine, plus some additional -# debugging information. -class MiniProduction(object): - def __init__(self, str, name, len, func, file, line): - self.name = name - self.len = len - self.func = func - self.callable = None - self.file = file - self.line = line - self.str = str - - def __str__(self): - return self.str - - def __repr__(self): - return 'MiniProduction(%s)' % self.str - - # Bind the production function name to a callable - def bind(self, pdict): - if self.func: - self.callable = pdict[self.func] - - -# ----------------------------------------------------------------------------- -# class LRItem -# -# This class represents a specific stage of parsing a production rule. For -# example: -# -# expr : expr . PLUS term -# -# In the above, the "." represents the current location of the parse. Here -# basic attributes: -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] -# number - Production number. -# -# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' -# then lr_next refers to 'expr -> expr PLUS . term' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# lr_after - List of all productions that immediately follow -# lr_before - Grammar symbol immediately before -# ----------------------------------------------------------------------------- - -class LRItem(object): - def __init__(self, p, n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n - self.lookaheads = {} - self.prod.insert(n, '.') - self.prod = tuple(self.prod) - self.len = len(self.prod) - self.usyms = p.usyms - - def __str__(self): - if self.prod: - s = '%s -> %s' % (self.name, ' '.join(self.prod)) - else: - s = '%s -> ' % self.name - return s - - def __repr__(self): - return 'LRItem(' + str(self) + ')' - -# ----------------------------------------------------------------------------- -# rightmost_terminal() -# -# Return the rightmost terminal from a list of symbols. Used in add_production() -# ----------------------------------------------------------------------------- -def rightmost_terminal(symbols, terminals): - i = len(symbols) - 1 - while i >= 0: - if symbols[i] in terminals: - return symbols[i] - i -= 1 - return None - -# ----------------------------------------------------------------------------- -# === GRAMMAR CLASS === -# -# The following class represents the contents of the specified grammar along -# with various computed properties such as first sets, follow sets, LR items, etc. -# This data is used for critical parts of the table generation process later. -# ----------------------------------------------------------------------------- - -class GrammarError(YaccError): - pass - -class Grammar(object): - def __init__(self, terminals): - self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - self.Prodmap = {} # A dictionary that is only used to detect duplicate - # productions. - - self.Terminals = {} # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - for term in terminals: - self.Terminals[term] = [] - - self.Terminals['error'] = [] - - self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - self.First = {} # A dictionary of precomputed FIRST(x) symbols - - self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - - self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. - - self.Start = None # Starting symbol for the grammar - - - def __len__(self): - return len(self.Productions) - - def __getitem__(self, index): - return self.Productions[index] - - # ----------------------------------------------------------------------------- - # set_precedence() - # - # Sets the precedence for a given terminal. assoc is the associativity such as - # 'left','right', or 'nonassoc'. level is a numeric level. - # - # ----------------------------------------------------------------------------- - - def set_precedence(self, term, assoc, level): - assert self.Productions == [None], 'Must call set_precedence() before add_production()' - if term in self.Precedence: - raise GrammarError('Precedence already specified for terminal %r' % term) - if assoc not in ['left', 'right', 'nonassoc']: - raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc, level) - - # ----------------------------------------------------------------------------- - # add_production() - # - # Given an action function, this function assembles a production rule and - # computes its precedence level. - # - # The production rule is supplied as a list of symbols. For example, - # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and - # symbols ['expr','PLUS','term']. - # - # Precedence is determined by the precedence of the right-most non-terminal - # or the precedence of a terminal specified by %prec. - # - # A variety of error checks are performed to make sure production symbols - # are valid and that %prec is used correctly. - # ----------------------------------------------------------------------------- - - def add_production(self, prodname, syms, func=None, file='', line=0): - - if prodname in self.Terminals: - raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) - if prodname == 'error': - raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) - if not _is_identifier.match(prodname): - raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - - # Look for literal tokens - for n, s in enumerate(syms): - if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % - (file, line, s, prodname)) - if c not in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != '%prec': - raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) - - # Determine the precedence level - if '%prec' in syms: - if syms[-1] == '%prec': - raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) - if syms[-2] != '%prec': - raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % - (file, line)) - precname = syms[-1] - prodprec = self.Precedence.get(precname) - if not prodprec: - raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) - else: - self.UsedPrecedence.add(precname) - del syms[-2:] # Drop %prec from the rule - else: - # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms, self.Terminals) - prodprec = self.Precedence.get(precname, ('right', 0)) - - # See if the rule is already in the rulemap - map = '%s -> %s' % (prodname, syms) - if map in self.Prodmap: - m = self.Prodmap[map] - raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + - 'Previous definition at %s:%d' % (m.file, m.line)) - - # From this point on, everything is valid. Create a new Production instance - pnumber = len(self.Productions) - if prodname not in self.Nonterminals: - self.Nonterminals[prodname] = [] - - # Add the production number to Terminals and Nonterminals - for t in syms: - if t in self.Terminals: - self.Terminals[t].append(pnumber) - else: - if t not in self.Nonterminals: - self.Nonterminals[t] = [] - self.Nonterminals[t].append(pnumber) - - # Create a production and add it to the list of productions - p = Production(pnumber, prodname, syms, prodprec, func, file, line) - self.Productions.append(p) - self.Prodmap[map] = p - - # Add to the global productions list - try: - self.Prodnames[prodname].append(p) - except KeyError: - self.Prodnames[prodname] = [p] - - # ----------------------------------------------------------------------------- - # set_start() - # - # Sets the starting symbol and creates the augmented grammar. Production - # rule 0 is S' -> start where start is the start symbol. - # ----------------------------------------------------------------------------- - - def set_start(self, start=None): - if not start: - start = self.Productions[1].name - if start not in self.Nonterminals: - raise GrammarError('start symbol %s undefined' % start) - self.Productions[0] = Production(0, "S'", [start]) - self.Nonterminals[start].append(0) - self.Start = start - - # ----------------------------------------------------------------------------- - # find_unreachable() - # - # Find all of the nonterminal symbols that can't be reached from the starting - # symbol. Returns a list of nonterminals that can't be reached. - # ----------------------------------------------------------------------------- - - def find_unreachable(self): - - # Mark all symbols that are reachable from a symbol s - def mark_reachable_from(s): - if s in reachable: - return - reachable.add(s) - for p in self.Prodnames.get(s, []): - for r in p.prod: - mark_reachable_from(r) - - reachable = set() - mark_reachable_from(self.Productions[0].prod[0]) - return [s for s in self.Nonterminals if s not in reachable] - - # ----------------------------------------------------------------------------- - # infinite_cycles() - # - # This function looks at the various parsing rules and tries to detect - # infinite recursion cycles (grammar rules where there is no possible way - # to derive a string of only terminals). - # ----------------------------------------------------------------------------- - - def infinite_cycles(self): - terminates = {} - - # Terminals: - for t in self.Terminals: - terminates[t] = True - - terminates['$end'] = True - - # Nonterminals: - - # Initialize to false: - for n in self.Nonterminals: - terminates[n] = False - - # Then propagate termination until no change: - while True: - some_change = False - for (n, pl) in self.Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = False - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = True - - if p_terminates: - # symbol n terminates! - if not terminates[n]: - terminates[n] = True - some_change = True - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - infinite = [] - for (s, term) in terminates.items(): - if not term: - if s not in self.Prodnames and s not in self.Terminals and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - infinite.append(s) - - return infinite - - # ----------------------------------------------------------------------------- - # undefined_symbols() - # - # Find all symbols that were used the grammar, but not defined as tokens or - # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. - # ----------------------------------------------------------------------------- - def undefined_symbols(self): - result = [] - for p in self.Productions: - if not p: - continue - - for s in p.prod: - if s not in self.Prodnames and s not in self.Terminals and s != 'error': - result.append((s, p)) - return result - - # ----------------------------------------------------------------------------- - # unused_terminals() - # - # Find all terminals that were defined, but not used by the grammar. Returns - # a list of all symbols. - # ----------------------------------------------------------------------------- - def unused_terminals(self): - unused_tok = [] - for s, v in self.Terminals.items(): - if s != 'error' and not v: - unused_tok.append(s) - - return unused_tok - - # ------------------------------------------------------------------------------ - # unused_rules() - # - # Find all grammar rules that were defined, but not used (maybe not reachable) - # Returns a list of productions. - # ------------------------------------------------------------------------------ - - def unused_rules(self): - unused_prod = [] - for s, v in self.Nonterminals.items(): - if not v: - p = self.Prodnames[s][0] - unused_prod.append(p) - return unused_prod - - # ----------------------------------------------------------------------------- - # unused_precedence() - # - # Returns a list of tuples (term,precedence) corresponding to precedence - # rules that were never used by the grammar. term is the name of the terminal - # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. - # ----------------------------------------------------------------------------- - - def unused_precedence(self): - unused = [] - for termname in self.Precedence: - if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname, self.Precedence[termname][0])) - - return unused - - # ------------------------------------------------------------------------- - # _first() - # - # Compute the value of FIRST1(beta) where beta is a tuple of symbols. - # - # During execution of compute_first1, the result may be incomplete. - # Afterward (e.g., when called from compute_follow()), it will be complete. - # ------------------------------------------------------------------------- - def _first(self, beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [] - for x in beta: - x_produces_empty = False - - # Add all the non- symbols of First[x] to the result. - for f in self.First[x]: - if f == '': - x_produces_empty = True - else: - if f not in result: - result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('') - - return result - - # ------------------------------------------------------------------------- - # compute_first() - # - # Compute the value of FIRST1(X) for all symbols - # ------------------------------------------------------------------------- - def compute_first(self): - if self.First: - return self.First - - # Terminals: - for t in self.Terminals: - self.First[t] = [t] - - self.First['$end'] = ['$end'] - - # Nonterminals: - - # Initialize to the empty set: - for n in self.Nonterminals: - self.First[n] = [] - - # Then propagate symbols until no change: - while True: - some_change = False - for n in self.Nonterminals: - for p in self.Prodnames[n]: - for f in self._first(p.prod): - if f not in self.First[n]: - self.First[n].append(f) - some_change = True - if not some_change: - break - - return self.First - - # --------------------------------------------------------------------- - # compute_follow() - # - # Computes all of the follow sets for every non-terminal symbol. The - # follow set is the set of all symbols that might follow a given - # non-terminal. See the Dragon book, 2nd Ed. p. 189. - # --------------------------------------------------------------------- - def compute_follow(self, start=None): - # If already computed, return the result - if self.Follow: - return self.Follow - - # If first sets not computed yet, do that first. - if not self.First: - self.compute_first() - - # Add '$end' to the follow list of the start symbol - for k in self.Nonterminals: - self.Follow[k] = [] - - if not start: - start = self.Productions[1].name - - self.Follow[start] = ['$end'] - - while True: - didadd = False - for p in self.Productions[1:]: - # Here is the production set - for i, B in enumerate(p.prod): - if B in self.Nonterminals: - # Okay. We got a non-terminal in a production - fst = self._first(p.prod[i+1:]) - hasempty = False - for f in fst: - if f != '' and f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True - if f == '': - hasempty = True - if hasempty or i == (len(p.prod)-1): - # Add elements of follow(a) to follow(b) - for f in self.Follow[p.name]: - if f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True - if not didadd: - break - return self.Follow - - - # ----------------------------------------------------------------------------- - # build_lritems() - # - # This function walks the list of productions and builds a complete set of the - # LR items. The LR items are stored in two ways: First, they are uniquely - # numbered and placed in the list _lritems. Second, a linked list of LR items - # is built for each production. For example: - # - # E -> E PLUS E - # - # Creates the list - # - # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] - # ----------------------------------------------------------------------------- - - def build_lritems(self): - for p in self.Productions: - lastlri = p - i = 0 - lr_items = [] - while True: - if i > len(p): - lri = None - else: - lri = LRItem(p, i) - # Precompute the list of productions immediately following - try: - lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError, KeyError): - lri.lr_after = [] - try: - lri.lr_before = lri.prod[i-1] - except IndexError: - lri.lr_before = None - - lastlri.lr_next = lri - if not lri: - break - lr_items.append(lri) - lastlri = lri - i += 1 - p.lr_items = lr_items - -# ----------------------------------------------------------------------------- -# == Class LRTable == -# -# This basic class represents a basic table of LR parsing information. -# Methods for generating the tables are not defined here. They are defined -# in the derived class LRGeneratedTable. -# ----------------------------------------------------------------------------- - -class VersionError(YaccError): - pass - -class LRTable(object): - def __init__(self): - self.lr_action = None - self.lr_goto = None - self.lr_productions = None - self.lr_method = None - - def read_table(self, module): - if isinstance(module, types.ModuleType): - parsetab = module - else: - exec('import %s' % module) - parsetab = sys.modules[module] - - if parsetab._tabversion != __tabversion__: - raise VersionError('yacc table file version is out of date') - - self.lr_action = parsetab._lr_action - self.lr_goto = parsetab._lr_goto - - self.lr_productions = [] - for p in parsetab._lr_productions: - self.lr_productions.append(MiniProduction(*p)) - - self.lr_method = parsetab._lr_method - return parsetab._lr_signature - - def read_pickle(self, filename): - try: - import cPickle as pickle - except ImportError: - import pickle - - if not os.path.exists(filename): - raise ImportError - - in_f = open(filename, 'rb') - - tabversion = pickle.load(in_f) - if tabversion != __tabversion__: - raise VersionError('yacc table file version is out of date') - self.lr_method = pickle.load(in_f) - signature = pickle.load(in_f) - self.lr_action = pickle.load(in_f) - self.lr_goto = pickle.load(in_f) - productions = pickle.load(in_f) - - self.lr_productions = [] - for p in productions: - self.lr_productions.append(MiniProduction(*p)) - - in_f.close() - return signature - - # Bind all production function names to callable objects in pdict - def bind_callables(self, pdict): - for p in self.lr_productions: - p.bind(pdict) - - -# ----------------------------------------------------------------------------- -# === LR Generator === -# -# The following classes and functions are used to generate LR parsing tables on -# a grammar. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - -def digraph(X, R, FP): - N = {} - for x in X: - N[x] = 0 - stack = [] - F = {} - for x in X: - if N[x] == 0: - traverse(x, N, stack, F, X, R, FP) - return F - -def traverse(x, N, stack, F, X, R, FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y, N, stack, F, X, R, FP) - N[x] = min(N[x], N[y]) - for a in F.get(y, []): - if a not in F[x]: - F[x].append(a) - if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - -class LALRError(YaccError): - pass - -# ----------------------------------------------------------------------------- -# == LRGeneratedTable == -# -# This class implements the LR table generation algorithm. There are no -# public methods except for write() -# ----------------------------------------------------------------------------- - -class LRGeneratedTable(LRTable): - def __init__(self, grammar, method='LALR', log=None): - if method not in ['SLR', 'LALR']: - raise LALRError('Unsupported method %s' % method) - - self.grammar = grammar - self.lr_method = method - - # Set up the logger - if not log: - log = NullLogger() - self.log = log - - # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures - - self._add_count = 0 # Internal counter used to detect cycles - - # Diagonistic information filled in by the table generator - self.sr_conflict = 0 - self.rr_conflict = 0 - self.conflicts = [] # List of conflicts - - self.sr_conflicts = [] - self.rr_conflicts = [] - - # Build the tables - self.grammar.build_lritems() - self.grammar.compute_first() - self.grammar.compute_follow() - self.lr_parse_table() - - # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - - def lr0_closure(self, I): - self._add_count += 1 - - # Add everything in I to J - J = I[:] - didadd = True - while didadd: - didadd = False - for j in J: - for x in j.lr_after: - if getattr(x, 'lr0_added', 0) == self._add_count: - continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = self._add_count - didadd = True - - return J - - # Compute the LR(0) goto function goto(I,X) where I is a set - # of LR(0) items and X is a grammar symbol. This function is written - # in a way that guarantees uniqueness of the generated goto sets - # (i.e. the same goto set will never be returned as two different Python - # objects). With uniqueness, we can later do fast set comparisons using - # id(obj) instead of element-wise comparison. - - def lr0_goto(self, I, x): - # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I), x)) - if g: - return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = self.lr_goto_cache.get(x) - if not s: - s = {} - self.lr_goto_cache[x] = s - - gs = [] - for p in I: - n = p.lr_next - if n and n.lr_before == x: - s1 = s.get(id(n)) - if not s1: - s1 = {} - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end') - if not g: - if gs: - g = self.lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - self.lr_goto_cache[(id(I), x)] = g - return g - - # Compute the LR(0) sets of item function - def lr0_items(self): - C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] - i = 0 - for I in C: - self.lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = {} - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms: - g = self.lr0_goto(I, x) - if not g or id(g) in self.lr0_cidhash: - continue - self.lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - - # ----------------------------------------------------------------------------- - # ==== LALR(1) Parsing ==== - # - # LALR(1) parsing is almost exactly the same as SLR except that instead of - # relying upon Follow() sets when performing reductions, a more selective - # lookahead set that incorporates the state of the LR(0) machine is utilized. - # Thus, we mainly just have to focus on calculating the lookahead sets. - # - # The method used here is due to DeRemer and Pennelo (1982). - # - # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) - # Lookahead Sets", ACM Transactions on Programming Languages and Systems, - # Vol. 4, No. 4, Oct. 1982, pp. 615-649 - # - # Further details can also be found in: - # - # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", - # McGraw-Hill Book Company, (1985). - # - # ----------------------------------------------------------------------------- - - # ----------------------------------------------------------------------------- - # compute_nullable_nonterminals() - # - # Creates a dictionary containing all of the non-terminals that might produce - # an empty production. - # ----------------------------------------------------------------------------- - - def compute_nullable_nonterminals(self): - nullable = set() - num_nullable = 0 - while True: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable.add(p.name) - continue - for t in p.prod: - if t not in nullable: - break - else: - nullable.add(p.name) - if len(nullable) == num_nullable: - break - num_nullable = len(nullable) - return nullable - - # ----------------------------------------------------------------------------- - # find_nonterminal_trans(C) - # - # Given a set of LR(0) items, this functions finds all of the non-terminal - # transitions. These are transitions in which a dot appears immediately before - # a non-terminal. Returns a list of tuples of the form (state,N) where state - # is the state number and N is the nonterminal symbol. - # - # The input C is the set of LR(0) items. - # ----------------------------------------------------------------------------- - - def find_nonterminal_transitions(self, C): - trans = [] - for stateno, state in enumerate(C): - for p in state: - if p.lr_index < p.len - 1: - t = (stateno, p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: - trans.append(t) - return trans - - # ----------------------------------------------------------------------------- - # dr_relation() - # - # Computes the DR(p,A) relationships for non-terminal transitions. The input - # is a tuple (state,N) where state is a number and N is a nonterminal symbol. - # - # Returns a list of terminals. - # ----------------------------------------------------------------------------- - - def dr_relation(self, C, trans, nullable): - dr_set = {} - state, N = trans - terms = [] - - g = self.lr0_goto(C[state], N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: - terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') - - return terms - - # ----------------------------------------------------------------------------- - # reads_relation() - # - # Computes the READS() relation (p,A) READS (t,C). - # ----------------------------------------------------------------------------- - - def reads_relation(self, C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = self.lr0_goto(C[state], N) - j = self.lr0_cidhash.get(id(g), -1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j, a)) - - return rel - - # ----------------------------------------------------------------------------- - # compute_lookback_includes() - # - # Determines the lookback and includes relations - # - # LOOKBACK: - # - # This relation is determined by running the LR(0) state machine forward. - # For example, starting with a production "N : . A B C", we run it forward - # to obtain "N : A B C ." We then build a relationship between this final - # state and the starting state. These relationships are stored in a dictionary - # lookdict. - # - # INCLUDES: - # - # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). - # - # This relation is used to determine non-terminal transitions that occur - # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) - # if the following holds: - # - # B -> LAT, where T -> epsilon and p' -L-> p - # - # L is essentially a prefix (which may be empty), T is a suffix that must be - # able to derive an empty string. State p' must lead to state p with the string L. - # - # ----------------------------------------------------------------------------- - - def compute_lookback_includes(self, C, trans, nullable): - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state, N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: - continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j, t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: - break # No forget it - if p.prod[li] not in nullable: - break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j, t)) - - g = self.lr0_goto(C[j], t) # Go to next set - j = self.lr0_cidhash.get(id(g), -1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: - continue - if r.len != p.len: - continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: - break - i = i + 1 - else: - lookb.append((j, r)) - for i in includes: - if i not in includedict: - includedict[i] = [] - includedict[i].append((state, N)) - lookdict[(state, N)] = lookb - - return lookdict, includedict - - # ----------------------------------------------------------------------------- - # compute_read_sets() - # - # Given a set of LR(0) items, this function computes the read sets. - # - # Inputs: C = Set of LR(0) items - # ntrans = Set of nonterminal transitions - # nullable = Set of empty transitions - # - # Returns a set containing the read sets - # ----------------------------------------------------------------------------- - - def compute_read_sets(self, C, ntrans, nullable): - FP = lambda x: self.dr_relation(C, x, nullable) - R = lambda x: self.reads_relation(C, x, nullable) - F = digraph(ntrans, R, FP) - return F - - # ----------------------------------------------------------------------------- - # compute_follow_sets() - # - # Given a set of LR(0) items, a set of non-terminal transitions, a readset, - # and an include set, this function computes the follow sets - # - # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} - # - # Inputs: - # ntrans = Set of nonterminal transitions - # readsets = Readset (previously computed) - # inclsets = Include sets (previously computed) - # - # Returns a set containing the follow sets - # ----------------------------------------------------------------------------- - - def compute_follow_sets(self, ntrans, readsets, inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x, []) - F = digraph(ntrans, R, FP) - return F - - # ----------------------------------------------------------------------------- - # add_lookaheads() - # - # Attaches the lookahead symbols to grammar rules. - # - # Inputs: lookbacks - Set of lookback relations - # followset - Computed follow set - # - # This function directly attaches the lookaheads to productions contained - # in the lookbacks set - # ----------------------------------------------------------------------------- - - def add_lookaheads(self, lookbacks, followset): - for trans, lb in lookbacks.items(): - # Loop over productions in lookback - for state, p in lb: - if state not in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans, []) - for a in f: - if a not in p.lookaheads[state]: - p.lookaheads[state].append(a) - - # ----------------------------------------------------------------------------- - # add_lalr_lookaheads() - # - # This function does all of the work of adding lookahead information for use - # with LALR parsing - # ----------------------------------------------------------------------------- - - def add_lalr_lookaheads(self, C): - # Determine all of the nullable nonterminals - nullable = self.compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(C) - - # Compute read sets - readsets = self.compute_read_sets(C, trans, nullable) - - # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C, trans, nullable) - - # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans, readsets, included) - - # Add all of the lookaheads - self.add_lookaheads(lookd, followsets) - - # ----------------------------------------------------------------------------- - # lr_parse_table() - # - # This function constructs the parse tables for SLR or LALR - # ----------------------------------------------------------------------------- - def lr_parse_table(self): - Productions = self.grammar.Productions - Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array - action = self.lr_action # Action array - log = self.log # Logger for output - - actionp = {} # Action production array (temporary) - - log.info('Parsing method: %s', self.lr_method) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = self.lr0_items() - - if self.lr_method == 'LALR': - self.add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [] # List of actions - st_action = {} - st_actionp = {} - st_goto = {} - log.info('') - log.info('state %d', st) - log.info('') - for p in I: - log.info(' (%d) %s', p.number, p) - log.info('') - - for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action['$end'] = 0 - st_actionp['$end'] = p - else: - # We are at the end of a production. Reduce! - if self.lr_method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = self.grammar.Follow[p.name] - for a in laheads: - actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) - r = st_action.get(a) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec, slevel = Productions[st_actionp[a].number].prec - rprec, rlevel = Precedence.get(a, ('right', 0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp, rejectp = pp, oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp, rejectp = oldp, pp - self.rr_conflicts.append((st, chosenp, rejectp)) - log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', - a, st_actionp[a].number, st_actionp[a]) - else: - raise LALRError('Unknown conflict in state %d' % st) - else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I, a) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - # We are in a shift state - actlist.append((a, p, 'shift and go to state %d' % j)) - r = st_action.get(a) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError('Shift/shift conflict in state %d' % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec, rlevel = Productions[st_actionp[a].number].prec - sprec, slevel = Precedence.get(a, ('right', 0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) - - else: - raise LALRError('Unknown conflict in state %d' % st) - else: - st_action[a] = j - st_actionp[a] = p - - # Print the actions associated with each terminal - _actprint = {} - for a, p, m in actlist: - if a in st_action: - if p is st_actionp[a]: - log.info(' %-15s %s', a, m) - _actprint[(a, m)] = 1 - log.info('') - # Print the actions that were not used. (debugging) - not_used = 0 - for a, p, m in actlist: - if a in st_action: - if p is not st_actionp[a]: - if not (a, m) in _actprint: - log.debug(' ! %-15s [ %s ]', a, m) - not_used = 1 - _actprint[(a, m)] = 1 - if not_used: - log.debug('') - - # Construct the goto table for this state - - nkeys = {} - for ii in I: - for s in ii.usyms: - if s in self.grammar.Nonterminals: - nkeys[s] = None - for n in nkeys: - g = self.lr0_goto(I, n) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - st_goto[n] = j - log.info(' %-30s shift and go to state %d', n, j) - - action[st] = st_action - actionp[st] = st_actionp - goto[st] = st_goto - st += 1 - - # ----------------------------------------------------------------------------- - # write() - # - # This function writes the LR parsing tables to a file - # ----------------------------------------------------------------------------- - - def write_table(self, tabmodule, outputdir='', signature=''): - if isinstance(tabmodule, types.ModuleType): - raise IOError("Won't overwrite existing tabmodule") - - basemodulename = tabmodule.split('.')[-1] - filename = os.path.join(outputdir, basemodulename) + '.py' - try: - f = open(filename, 'w') - - f.write(''' -# %s -# This file is automatically generated. Do not edit. -_tabversion = %r - -_lr_method = %r - -_lr_signature = %r - ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = {} - - for s, nd in self.lr_action.items(): - for name, v in nd.items(): - i = items.get(name) - if not i: - i = ([], []) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write('\n_lr_action_items = {') - for k, v in items.items(): - f.write('%r:([' % k) - for i in v[0]: - f.write('%r,' % i) - f.write('],[') - for i in v[1]: - f.write('%r,' % i) - - f.write(']),') - f.write('}\n') - - f.write(''' -_lr_action = {} -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = {} - _lr_action[_x][_k] = _y -del _lr_action_items -''') - - else: - f.write('\n_lr_action = { ') - for k, v in self.lr_action.items(): - f.write('(%r,%r):%r,' % (k[0], k[1], v)) - f.write('}\n') - - if smaller: - # Factor out names to try and make smaller - items = {} - - for s, nd in self.lr_goto.items(): - for name, v in nd.items(): - i = items.get(name) - if not i: - i = ([], []) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write('\n_lr_goto_items = {') - for k, v in items.items(): - f.write('%r:([' % k) - for i in v[0]: - f.write('%r,' % i) - f.write('],[') - for i in v[1]: - f.write('%r,' % i) - - f.write(']),') - f.write('}\n') - - f.write(''' -_lr_goto = {} -for _k, _v in _lr_goto_items.items(): - for _x, _y in zip(_v[0], _v[1]): - if not _x in _lr_goto: _lr_goto[_x] = {} - _lr_goto[_x][_k] = _y -del _lr_goto_items -''') - else: - f.write('\n_lr_goto = { ') - for k, v in self.lr_goto.items(): - f.write('(%r,%r):%r,' % (k[0], k[1], v)) - f.write('}\n') - - # Write production table - f.write('_lr_productions = [\n') - for p in self.lr_productions: - if p.func: - f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, - p.func, os.path.basename(p.file), p.line)) - else: - f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) - f.write(']\n') - f.close() - - except IOError as e: - raise - - - # ----------------------------------------------------------------------------- - # pickle_table() - # - # This function pickles the LR parsing tables to a supplied file object - # ----------------------------------------------------------------------------- - - def pickle_table(self, filename, signature=''): - try: - import cPickle as pickle - except ImportError: - import pickle - with open(filename, 'wb') as outf: - pickle.dump(__tabversion__, outf, pickle_protocol) - pickle.dump(self.lr_method, outf, pickle_protocol) - pickle.dump(signature, outf, pickle_protocol) - pickle.dump(self.lr_action, outf, pickle_protocol) - pickle.dump(self.lr_goto, outf, pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) - else: - outp.append((str(p), p.name, p.len, None, None, None)) - pickle.dump(outp, outf, pickle_protocol) - -# ----------------------------------------------------------------------------- -# === INTROSPECTION === -# -# The following functions and classes are used to implement the PLY -# introspection features followed by the yacc() function itself. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - -def get_caller_module_dict(levels): - f = sys._getframe(levels) - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - return ldict - -# ----------------------------------------------------------------------------- -# parse_grammar() -# -# This takes a raw grammar rule string and parses it into production data -# ----------------------------------------------------------------------------- -def parse_grammar(doc, file, line): - grammar = [] - # Split the doc string into lines - pstrings = doc.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: - continue - try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) - prodname = lastp - syms = p[1:] - else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] - if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - - grammar.append((file, dline, prodname, syms)) - except SyntaxError: - raise - except Exception: - raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) - - return grammar - -# ----------------------------------------------------------------------------- -# ParserReflect() -# -# This class represents information extracted for building a parser including -# start symbol, error function, tokens, precedence list, action functions, -# etc. -# ----------------------------------------------------------------------------- -class ParserReflect(object): - def __init__(self, pdict, log=None): - self.pdict = pdict - self.start = None - self.error_func = None - self.tokens = None - self.modules = set() - self.grammar = [] - self.error = False - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_start() - self.get_error_func() - self.get_tokens() - self.get_precedence() - self.get_pfunctions() - - # Validate all of the information - def validate_all(self): - self.validate_start() - self.validate_error_func() - self.validate_tokens() - self.validate_precedence() - self.validate_pfunctions() - self.validate_modules() - return self.error - - # Compute a signature over the grammar - def signature(self): - try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() - if self.start: - sig.update(self.start.encode('latin-1')) - if self.prec: - sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1')) - if self.tokens: - sig.update(' '.join(self.tokens).encode('latin-1')) - for f in self.pfuncs: - if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError, ValueError): - pass - - digest = base64.b16encode(sig.digest()) - if sys.version_info[0] >= 3: - digest = digest.decode('latin-1') - return digest - - # ----------------------------------------------------------------------------- - # validate_modules() - # - # This method checks to see if there are duplicated p_rulename() functions - # in the parser module file. Without this function, it is really easy for - # users to make mistakes by cutting and pasting code fragments (and it's a real - # bugger to try and figure out why the resulting parser doesn't work). Therefore, - # we just do a little regular expression pattern matching of def statements - # to try and detect duplicates. - # ----------------------------------------------------------------------------- - - def validate_modules(self): - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - - for module in self.modules: - lines, linen = inspect.getsourcelines(module) - - counthash = {} - for linen, line in enumerate(lines): - linen += 1 - m = fre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - filename = inspect.getsourcefile(module) - self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', - filename, linen, name, prev) - - # Get the start symbol - def get_start(self): - self.start = self.pdict.get('start') - - # Validate the start symbol - def validate_start(self): - if self.start is not None: - if not isinstance(self.start, str): - self.log.error("'start' must be a string") - - # Look for error handler - def get_error_func(self): - self.error_func = self.pdict.get('p_error') - - # Validate the error function - def validate_error_func(self): - if self.error_func: - if isinstance(self.error_func, types.FunctionType): - ismethod = 0 - elif isinstance(self.error_func, types.MethodType): - ismethod = 1 - else: - self.log.error("'p_error' defined, but is not a function or method") - self.error = True - return - - eline = self.error_func.__code__.co_firstlineno - efile = self.error_func.__code__.co_filename - module = inspect.getmodule(self.error_func) - self.modules.add(module) - - argcount = self.error_func.__code__.co_argcount - ismethod - if argcount != 1: - self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) - self.error = True - - # Get the tokens map - def get_tokens(self): - tokens = self.pdict.get('tokens') - if not tokens: - self.log.error('No token list is defined') - self.error = True - return - - if not isinstance(tokens, (list, tuple)): - self.log.error('tokens must be a list or tuple') - self.error = True - return - - if not tokens: - self.log.error('tokens is empty') - self.error = True - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - # Validate the tokens. - if 'error' in self.tokens: - self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = True - return - - terminals = set() - for n in self.tokens: - if n in terminals: - self.log.warning('Token %r multiply defined', n) - terminals.add(n) - - # Get the precedence map (if any) - def get_precedence(self): - self.prec = self.pdict.get('precedence') - - # Validate and parse the precedence map - def validate_precedence(self): - preclist = [] - if self.prec: - if not isinstance(self.prec, (list, tuple)): - self.log.error('precedence must be a list or tuple') - self.error = True - return - for level, p in enumerate(self.prec): - if not isinstance(p, (list, tuple)): - self.log.error('Bad precedence table') - self.error = True - return - - if len(p) < 2: - self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) - self.error = True - return - assoc = p[0] - if not isinstance(assoc, str): - self.log.error('precedence associativity must be a string') - self.error = True - return - for term in p[1:]: - if not isinstance(term, str): - self.log.error('precedence items must be strings') - self.error = True - return - preclist.append((term, assoc, level+1)) - self.preclist = preclist - - # Get all p_functions from the grammar - def get_pfunctions(self): - p_functions = [] - for name, item in self.pdict.items(): - if not name.startswith('p_') or name == 'p_error': - continue - if isinstance(item, (types.FunctionType, types.MethodType)): - line = item.__code__.co_firstlineno - module = inspect.getmodule(item) - p_functions.append((line, module, name, item.__doc__)) - - # Sort all of the actions by line number; make sure to stringify - # modules to make them sortable, since `line` may not uniquely sort all - # p functions - p_functions.sort(key=lambda p_function: ( - p_function[0], - str(p_function[1]), - p_function[2], - p_function[3])) - self.pfuncs = p_functions - - # Validate all of the p_functions - def validate_pfunctions(self): - grammar = [] - # Check for non-empty symbols - if len(self.pfuncs) == 0: - self.log.error('no rules of the form p_rulename are defined') - self.error = True - return - - for line, module, name, doc in self.pfuncs: - file = inspect.getsourcefile(module) - func = self.pdict[name] - if isinstance(func, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - if func.__code__.co_argcount > reqargs: - self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) - self.error = True - elif func.__code__.co_argcount < reqargs: - self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) - self.error = True - elif not func.__doc__: - self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', - file, line, func.__name__) - else: - try: - parsed_g = parse_grammar(doc, file, line) - for g in parsed_g: - grammar.append((name, g)) - except SyntaxError as e: - self.log.error(str(e)) - self.error = True - - # Looks like a valid grammar rule - # Mark the file in which defined. - self.modules.add(module) - - # Secondary validation step that looks for p_ definitions that are not functions - # or functions that look like they might be grammar rules. - - for n, v in self.pdict.items(): - if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): - continue - if n.startswith('t_'): - continue - if n.startswith('p_') and n != 'p_error': - self.log.warning('%r not defined as a function', n) - if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or - (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): - if v.__doc__: - try: - doc = v.__doc__.split(' ') - if doc[1] == ':': - self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', - v.__code__.co_filename, v.__code__.co_firstlineno, n) - except IndexError: - pass - - self.grammar = grammar - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build a parser -# ----------------------------------------------------------------------------- - -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, - outputdir=None, debuglog=None, errorlog=None, picklefile=None): - - if tabmodule is None: - tabmodule = tab_module - - # Reference to the parsing method of the last built parser - global parse - - # If pickling is enabled, table files are not created - if picklefile: - write_tables = 0 - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the parser - if module: - _items = [(k, getattr(module, k)) for k in dir(module)] - pdict = dict(_items) - # If no __file__ attribute is available, try to obtain it from the __module__ instead - if '__file__' not in pdict: - pdict['__file__'] = sys.modules[pdict['__module__']].__file__ - else: - pdict = get_caller_module_dict(2) - - if outputdir is None: - # If no output directory is set, the location of the output files - # is determined according to the following rules: - # - If tabmodule specifies a package, files go into that package directory - # - Otherwise, files go in the same directory as the specifying module - if isinstance(tabmodule, types.ModuleType): - srcfile = tabmodule.__file__ - else: - if '.' not in tabmodule: - srcfile = pdict['__file__'] - else: - parts = tabmodule.split('.') - pkgname = '.'.join(parts[:-1]) - exec('import %s' % pkgname) - srcfile = getattr(sys.modules[pkgname], '__file__', '') - outputdir = os.path.dirname(srcfile) - - # Determine if the module is package of a package or not. - # If so, fix the tabmodule setting so that tables load correctly - pkg = pdict.get('__package__') - if pkg and isinstance(tabmodule, str): - if '.' not in tabmodule: - tabmodule = pkg + '.' + tabmodule - - - - # Set start symbol if it's specified directly using an argument - if start is not None: - pdict['start'] = start - - # Collect parser information from the dictionary - pinfo = ParserReflect(pdict, log=errorlog) - pinfo.get_all() - - if pinfo.error: - raise YaccError('Unable to build parser') - - # Check signature against table files (if any) - signature = pinfo.signature() - - # Read the tables - try: - lr = LRTable() - if picklefile: - read_signature = lr.read_pickle(picklefile) - else: - read_signature = lr.read_table(tabmodule) - if optimize or (read_signature == signature): - try: - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr, pinfo.error_func) - parse = parser.parse - return parser - except Exception as e: - errorlog.warning('There was a problem loading the table file: %r', e) - except VersionError as e: - errorlog.warning(str(e)) - except ImportError: - pass - - if debuglog is None: - if debug: - try: - debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) - except IOError as e: - errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) - debuglog = NullLogger() - else: - debuglog = NullLogger() - - debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) - - errors = False - - # Validate the parser information - if pinfo.validate_all(): - raise YaccError('Unable to build parser') - - if not pinfo.error_func: - errorlog.warning('no p_error() function is defined') - - # Create a grammar object - grammar = Grammar(pinfo.tokens) - - # Set precedence level for terminals - for term, assoc, level in pinfo.preclist: - try: - grammar.set_precedence(term, assoc, level) - except GrammarError as e: - errorlog.warning('%s', e) - - # Add productions to the grammar - for funcname, gram in pinfo.grammar: - file, line, prodname, syms = gram - try: - grammar.add_production(prodname, syms, funcname, file, line) - except GrammarError as e: - errorlog.error('%s', e) - errors = True - - # Set the grammar start symbols - try: - if start is None: - grammar.set_start(pinfo.start) - else: - grammar.set_start(start) - except GrammarError as e: - errorlog.error(str(e)) - errors = True - - if errors: - raise YaccError('Unable to build parser') - - # Verify the grammar structure - undefined_symbols = grammar.undefined_symbols() - for sym, prod in undefined_symbols: - errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) - errors = True - - unused_terminals = grammar.unused_terminals() - if unused_terminals: - debuglog.info('') - debuglog.info('Unused terminals:') - debuglog.info('') - for term in unused_terminals: - errorlog.warning('Token %r defined, but not used', term) - debuglog.info(' %s', term) - - # Print out all productions to the debug log - if debug: - debuglog.info('') - debuglog.info('Grammar') - debuglog.info('') - for n, p in enumerate(grammar.Productions): - debuglog.info('Rule %-5d %s', n, p) - - # Find unused non-terminals - unused_rules = grammar.unused_rules() - for prod in unused_rules: - errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) - - if len(unused_terminals) == 1: - errorlog.warning('There is 1 unused token') - if len(unused_terminals) > 1: - errorlog.warning('There are %d unused tokens', len(unused_terminals)) - - if len(unused_rules) == 1: - errorlog.warning('There is 1 unused rule') - if len(unused_rules) > 1: - errorlog.warning('There are %d unused rules', len(unused_rules)) - - if debug: - debuglog.info('') - debuglog.info('Terminals, with rules where they appear') - debuglog.info('') - terms = list(grammar.Terminals) - terms.sort() - for term in terms: - debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info('') - debuglog.info('Nonterminals, with rules where they appear') - debuglog.info('') - nonterms = list(grammar.Nonterminals) - nonterms.sort() - for nonterm in nonterms: - debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info('') - - if check_recursion: - unreachable = grammar.find_unreachable() - for u in unreachable: - errorlog.warning('Symbol %r is unreachable', u) - - infinite = grammar.infinite_cycles() - for inf in infinite: - errorlog.error('Infinite recursion detected for symbol %r', inf) - errors = True - - unused_prec = grammar.unused_precedence() - for term, assoc in unused_prec: - errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) - errors = True - - if errors: - raise YaccError('Unable to build parser') - - # Run the LRGeneratedTable on the grammar - if debug: - errorlog.debug('Generating %s tables', method) - - lr = LRGeneratedTable(grammar, method, debuglog) - - if debug: - num_sr = len(lr.sr_conflicts) - - # Report shift/reduce and reduce/reduce conflicts - if num_sr == 1: - errorlog.warning('1 shift/reduce conflict') - elif num_sr > 1: - errorlog.warning('%d shift/reduce conflicts', num_sr) - - num_rr = len(lr.rr_conflicts) - if num_rr == 1: - errorlog.warning('1 reduce/reduce conflict') - elif num_rr > 1: - errorlog.warning('%d reduce/reduce conflicts', num_rr) - - # Write out conflicts to the output file - if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning('') - debuglog.warning('Conflicts:') - debuglog.warning('') - - for state, tok, resolution in lr.sr_conflicts: - debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) - - already_reported = set() - for state, rule, rejected in lr.rr_conflicts: - if (state, id(rule), id(rejected)) in already_reported: - continue - debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - debuglog.warning('rejected rule (%s) in state %d', rejected, state) - errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - errorlog.warning('rejected rule (%s) in state %d', rejected, state) - already_reported.add((state, id(rule), id(rejected))) - - warned_never = [] - for state, rule, rejected in lr.rr_conflicts: - if not rejected.reduced and (rejected not in warned_never): - debuglog.warning('Rule (%s) is never reduced', rejected) - errorlog.warning('Rule (%s) is never reduced', rejected) - warned_never.append(rejected) - - # Write the table file if requested - if write_tables: - try: - lr.write_table(tabmodule, outputdir, signature) - except IOError as e: - errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) - - # Write a pickled version of the tables - if picklefile: - try: - lr.pickle_table(picklefile, signature) - except IOError as e: - errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) - - # Build the parser - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr, pinfo.error_func) - - parse = parser.parse - return parser diff --git a/xonsh/ply/ygen.py b/xonsh/ply/ygen.py deleted file mode 100644 index acf5ca1a3..000000000 --- a/xonsh/ply/ygen.py +++ /dev/null @@ -1,74 +0,0 @@ -# ply: ygen.py -# -# This is a support program that auto-generates different versions of the YACC parsing -# function with different features removed for the purposes of performance. -# -# Users should edit the method LParser.parsedebug() in yacc.py. The source code -# for that method is then used to create the other methods. See the comments in -# yacc.py for further details. - -import os.path -import shutil - -def get_source_range(lines, tag): - srclines = enumerate(lines) - start_tag = '#--! %s-start' % tag - end_tag = '#--! %s-end' % tag - - for start_index, line in srclines: - if line.strip().startswith(start_tag): - break - - for end_index, line in srclines: - if line.strip().endswith(end_tag): - break - - return (start_index + 1, end_index) - -def filter_section(lines, tag): - filtered_lines = [] - include = True - tag_text = '#--! %s' % tag - for line in lines: - if line.strip().startswith(tag_text): - include = not include - elif include: - filtered_lines.append(line) - return filtered_lines - -def main(): - dirname = os.path.dirname(__file__) - shutil.copy2(os.path.join(dirname, 'yacc.py'), os.path.join(dirname, 'yacc.py.bak')) - with open(os.path.join(dirname, 'yacc.py'), 'r') as f: - lines = f.readlines() - - parse_start, parse_end = get_source_range(lines, 'parsedebug') - parseopt_start, parseopt_end = get_source_range(lines, 'parseopt') - parseopt_notrack_start, parseopt_notrack_end = get_source_range(lines, 'parseopt-notrack') - - # Get the original source - orig_lines = lines[parse_start:parse_end] - - # Filter the DEBUG sections out - parseopt_lines = filter_section(orig_lines, 'DEBUG') - - # Filter the TRACKING sections out - parseopt_notrack_lines = filter_section(parseopt_lines, 'TRACKING') - - # Replace the parser source sections with updated versions - lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines - lines[parseopt_start:parseopt_end] = parseopt_lines - - lines = [line.rstrip()+'\n' for line in lines] - with open(os.path.join(dirname, 'yacc.py'), 'w') as f: - f.writelines(lines) - - print('Updated yacc.py') - -if __name__ == '__main__': - main() - - - - -