commit 5757edb15e7c4e440d3320a821c5f90564c93eac Author: Anthony Scopatz Date: Wed Jan 21 17:04:13 2015 -0500 everything is a mess diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..7e99e367f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc \ No newline at end of file diff --git a/scripts/xonsh b/scripts/xonsh new file mode 100755 index 000000000..ab0b0f1f5 --- /dev/null +++ b/scripts/xonsh @@ -0,0 +1,3 @@ +#!/usr/bin/env python +from xonsh.main import main +main() diff --git a/xonsh/__init__.py b/xonsh/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/xonsh/lexer.py b/xonsh/lexer.py new file mode 100644 index 000000000..2d43b236e --- /dev/null +++ b/xonsh/lexer.py @@ -0,0 +1,452 @@ +from __future__ import print_function, unicode_literals +import re +import sys + +from ply import lex +from ply.lex import TOKEN + + +class Lexer(object): + """Implements a lexer for the xonsh language. + """ + def __init__(self, errfunc=lambda e, l, c: print(e)): + """ + Parameters + ---------- + errfunc : function, optional + An error function callback. Accepts an error + message, line and column as arguments. + + Attributes + ---------- + lexer : a PLY lexer bound to self + fname : str + Filename + last : token + The last token seen. + lineno : int + The last line number seen. + """ + self.errfunc = errfunc + self.fname = '' + self.last = None + + def build(self, **kwargs): + """Part of the PLY lexer API. + """ + self.lexer = lex.lex(object=self, **kwargs) + + @property + def lineno(self): + return self.lexer.lineno + + @lineno.setter + def lineno(self, value) + self.lexer.lineno = 1 + + def input(self, s): + """Calls the lexer on the string s.""" + self.lexer.input(s) + + def token(self): + """Retrieves the next token.""" + self.last = self.lexer.token() + return self.last + + def token_col(self, token): + """Discovers the token column number.""" + offset = self.lexer.lexdata.rfind('\n', 0, token.lexpos) + return token.lexpos - offset + + ######################-- PRIVATE --###################### + + ## + ## Internal auxiliary methods + ## + def _error(self, msg, token): + location = self._make_tok_location(token) + self.error_func(msg, location[0], location[1]) + self.lexer.skip(1) + + def _make_tok_location(self, token): + return (token.lineno, self.find_tok_column(token)) + + ## + ## Reserved keywords + ## + keywords = ( + '_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', + 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN', + 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'REGISTER', + 'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', + 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID', + 'VOLATILE', 'WHILE', + ) + + keyword_map = {} + for keyword in keywords: + if keyword == '_BOOL': + keyword_map['_Bool'] = keyword + elif keyword == '_COMPLEX': + keyword_map['_Complex'] = keyword + else: + keyword_map[keyword.lower()] = keyword + + ## + ## All the tokens recognized by the lexer + ## + tokens = keywords + ( + # Identifiers + 'ID', + + # Type identifiers (identifiers previously defined as + # types with typedef) + 'TYPEID', + + # constants + 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', + 'FLOAT_CONST', 'HEX_FLOAT_CONST', + 'CHAR_CONST', + 'WCHAR_CONST', + + # String literals + 'STRING_LITERAL', + 'WSTRING_LITERAL', + + # Operators + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', + 'LOR', 'LAND', 'LNOT', + 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', + + # Assignment + 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', + 'PLUSEQUAL', 'MINUSEQUAL', + 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', + 'OREQUAL', + + # Increment/decrement + 'PLUSPLUS', 'MINUSMINUS', + + # Structure dereference (->) + 'ARROW', + + # Conditional operator (?) + 'CONDOP', + + # Delimeters + 'LPAREN', 'RPAREN', # ( ) + 'LBRACKET', 'RBRACKET', # [ ] + 'LBRACE', 'RBRACE', # { } + 'COMMA', 'PERIOD', # . , + 'SEMI', 'COLON', # ; : + + # Ellipsis (...) + 'ELLIPSIS', + + # pre-processor + 'PPHASH', # '#' + ) + + ## + ## Regexes for use in tokens + ## + ## + + # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers) + identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*' + + hex_prefix = '0[xX]' + hex_digits = '[0-9a-fA-F]+' + + # integer constants (K&R2: A.2.5.1) + integer_suffix_opt = r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?' + decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')' + octal_constant = '0[0-7]*'+integer_suffix_opt + hex_constant = hex_prefix+hex_digits+integer_suffix_opt + + bad_octal_constant = '0[0-7]*[89]' + + # character constants (K&R2: A.2.5.2) + # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line + # directives with Windows paths as filenames (..\..\dir\file) + # For the same reason, decimal_escape allows all digit sequences. We want to + # parse all correct code, even if it means to sometimes parse incorrect + # code. + # + simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" + decimal_escape = r"""(\d+)""" + hex_escape = r"""(x[0-9a-fA-F]+)""" + bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" + + escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' + cconst_char = r"""([^'\\\n]|"""+escape_sequence+')' + char_const = "'"+cconst_char+"'" + wchar_const = 'L'+char_const + unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)" + bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')""" + + # string literals (K&R2: A.2.6) + string_char = r"""([^"\\\n]|"""+escape_sequence+')' + string_literal = '"'+string_char+'*"' + wstring_literal = 'L'+string_literal + bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' + + # floating constants (K&R2: A.2.5.3) + exponent_part = r"""([eE][-+]?[0-9]+)""" + fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" + floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)' + binary_exponent_part = r'''([pP][+-]?[0-9]+)''' + hex_fractional_constant = '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))""" + hex_floating_constant = '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+binary_exponent_part+'[FfLl]?)' + + ## + ## Lexer states: used for preprocessor \n-terminated directives + ## + states = ( + # ppline: preprocessor line directives + # + ('ppline', 'exclusive'), + + # pppragma: pragma + # + ('pppragma', 'exclusive'), + ) + + def t_PPHASH(self, t): + r'[ \t]*\#' + if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos): + t.lexer.begin('ppline') + self.pp_line = self.pp_filename = None + elif self.pragma_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos): + t.lexer.begin('pppragma') + else: + t.type = 'PPHASH' + return t + + ## + ## Rules for the ppline state + ## + @TOKEN(string_literal) + def t_ppline_FILENAME(self, t): + if self.pp_line is None: + self._error('filename before line number in #line', t) + else: + self.pp_filename = t.value.lstrip('"').rstrip('"') + + @TOKEN(decimal_constant) + def t_ppline_LINE_NUMBER(self, t): + if self.pp_line is None: + self.pp_line = t.value + else: + # Ignore: GCC's cpp sometimes inserts a numeric flag + # after the file name + pass + + def t_ppline_NEWLINE(self, t): + r'\n' + + if self.pp_line is None: + self._error('line number missing in #line', t) + else: + self.lexer.lineno = int(self.pp_line) + + if self.pp_filename is not None: + self.filename = self.pp_filename + + t.lexer.begin('INITIAL') + + def t_ppline_PPLINE(self, t): + r'line' + pass + + t_ppline_ignore = ' \t' + + def t_ppline_error(self, t): + self._error('invalid #line directive', t) + + ## + ## Rules for the pppragma state + ## + def t_pppragma_NEWLINE(self, t): + r'\n' + t.lexer.lineno += 1 + t.lexer.begin('INITIAL') + + def t_pppragma_PPPRAGMA(self, t): + r'pragma' + pass + + t_pppragma_ignore = ' \t<>.-{}();+-*/$%@&^~!?:,0123456789' + + @TOKEN(string_literal) + def t_pppragma_STR(self, t): pass + + @TOKEN(identifier) + def t_pppragma_ID(self, t): pass + + def t_pppragma_error(self, t): + self._error('invalid #pragma directive', t) + + ## + ## Rules for the normal state + ## + t_ignore = ' \t' + + # Newlines + def t_NEWLINE(self, t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + # Operators + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_MOD = r'%' + t_OR = r'\|' + t_AND = r'&' + t_NOT = r'~' + t_XOR = r'\^' + t_LSHIFT = r'<<' + t_RSHIFT = r'>>' + t_LOR = r'\|\|' + t_LAND = r'&&' + t_LNOT = r'!' + t_LT = r'<' + t_GT = r'>' + t_LE = r'<=' + t_GE = r'>=' + t_EQ = r'==' + t_NE = r'!=' + + # Assignment operators + t_EQUALS = r'=' + t_TIMESEQUAL = r'\*=' + t_DIVEQUAL = r'/=' + t_MODEQUAL = r'%=' + t_PLUSEQUAL = r'\+=' + t_MINUSEQUAL = r'-=' + t_LSHIFTEQUAL = r'<<=' + t_RSHIFTEQUAL = r'>>=' + t_ANDEQUAL = r'&=' + t_OREQUAL = r'\|=' + t_XOREQUAL = r'\^=' + + # Increment/decrement + t_PLUSPLUS = r'\+\+' + t_MINUSMINUS = r'--' + + # -> + t_ARROW = r'->' + + # ? + t_CONDOP = r'\?' + + # Delimeters + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_LBRACKET = r'\[' + t_RBRACKET = r'\]' + t_COMMA = r',' + t_PERIOD = r'\.' + t_SEMI = r';' + t_COLON = r':' + t_ELLIPSIS = r'\.\.\.' + + # Scope delimiters + # To see why on_lbrace_func is needed, consider: + # typedef char TT; + # void foo(int TT) { TT = 10; } + # TT x = 5; + # Outside the function, TT is a typedef, but inside (starting and ending + # with the braces) it's a parameter. The trouble begins with yacc's + # lookahead token. If we open a new scope in brace_open, then TT has + # already been read and incorrectly interpreted as TYPEID. So, we need + # to open and close scopes from within the lexer. + # Similar for the TT immediately outside the end of the function. + # + @TOKEN(r'\{') + def t_LBRACE(self, t): + self.on_lbrace_func() + return t + @TOKEN(r'\}') + def t_RBRACE(self, t): + self.on_rbrace_func() + return t + + t_STRING_LITERAL = string_literal + + # The following floating and integer constants are defined as + # functions to impose a strict order (otherwise, decimal + # is placed before the others because its regex is longer, + # and this is bad) + # + @TOKEN(floating_constant) + def t_FLOAT_CONST(self, t): + return t + + @TOKEN(hex_floating_constant) + def t_HEX_FLOAT_CONST(self, t): + return t + + @TOKEN(hex_constant) + def t_INT_CONST_HEX(self, t): + return t + + @TOKEN(bad_octal_constant) + def t_BAD_CONST_OCT(self, t): + msg = "Invalid octal constant" + self._error(msg, t) + + @TOKEN(octal_constant) + def t_INT_CONST_OCT(self, t): + return t + + @TOKEN(decimal_constant) + def t_INT_CONST_DEC(self, t): + return t + + # Must come before bad_char_const, to prevent it from + # catching valid char constants as invalid + # + @TOKEN(char_const) + def t_CHAR_CONST(self, t): + return t + + @TOKEN(wchar_const) + def t_WCHAR_CONST(self, t): + return t + + @TOKEN(unmatched_quote) + def t_UNMATCHED_QUOTE(self, t): + msg = "Unmatched '" + self._error(msg, t) + + @TOKEN(bad_char_const) + def t_BAD_CHAR_CONST(self, t): + msg = "Invalid char constant %s" % t.value + self._error(msg, t) + + @TOKEN(wstring_literal) + def t_WSTRING_LITERAL(self, t): + return t + + # unmatched string literals are caught by the preprocessor + + @TOKEN(bad_string_literal) + def t_BAD_STRING_LITERAL(self, t): + msg = "String contains invalid escape code" + self._error(msg, t) + + @TOKEN(identifier) + def t_ID(self, t): + t.type = self.keyword_map.get(t.value, "ID") + if t.type == 'ID' and self.type_lookup_func(t.value): + t.type = "TYPEID" + return t + + def t_error(self, t): + msg = 'Illegal character %s' % repr(t.value[0]) + self._error(msg, t) + diff --git a/xonsh/main.py b/xonsh/main.py new file mode 100644 index 000000000..a4a8dc4ac --- /dev/null +++ b/xonsh/main.py @@ -0,0 +1,20 @@ +"""The main xonsh script.""" +import os +import sys +import shlex +import subprocess +from argparse import ArgumentParser, Namespace + +import urwid + +from xonsh.main_display import MainDisplay + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + disp = MainDisplay() + disp.main() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/xonsh/main_display.py b/xonsh/main_display.py new file mode 100644 index 000000000..2a6e10c26 --- /dev/null +++ b/xonsh/main_display.py @@ -0,0 +1,42 @@ +"""The main xonsh displaye.""" +import urwid + +from xonsh.shell_view import ShellView + +class MainDisplay(object): + + def __init__(self): + self.shell = shell = ShellView() + self.view = urwid.LineBox( + urwid.Pile([ + ('weight', 70, shell), + ('fixed', 1, urwid.Filler(urwid.Edit('focus test edit: '))), + ]), + ) + urwid.connect_signal(shell, 'title', self.set_title) + urwid.connect_signal(shell, 'closed', self.quit) + + + def set_title(self, widget, title): + self.view.set_title(title) + + def quit(self, *args, **kwargs): + raise urwid.ExitMainLoop() + + def handle_key(self, key): + if key in ('q', 'Q'): + self.quit() + + def main(self, line=1, col=1): + loop = urwid.MainLoop(self.view, + handle_mouse=False, + unhandled_input=self.handle_key) + loop.screen.set_terminal_properties(256) + self.loop = self.shell.main_loop = loop + while True: + try: + self.loop.run() + except KeyboardInterrupt: + self.reset_status(status="YOLO! ") + else: + break diff --git a/xonsh/shell_view.py b/xonsh/shell_view.py new file mode 100644 index 000000000..17be023e1 --- /dev/null +++ b/xonsh/shell_view.py @@ -0,0 +1,304 @@ +"""The main shell for xonsh.""" +import os + +from urwid import Widget + +class ShellView(Widget): + _selectable = True + _sizing = frozenset(['box']) + signals = ['closed', 'beep', 'leds', 'title'] + + def __init__(self, env=None, main_loop=None, escape_sequence=None): + """ + A terminal emulator within a widget. + 'command' is the command to execute inside the terminal, provided as a + list of the command followed by its arguments. If 'command' is None, + the command is the current user's shell. You can also provide a callable + instead of a command, which will be executed in the subprocess. + 'env' can be used to pass custom environment variables. If omitted, + os.environ is used. + 'main_loop' should be provided, because the canvas state machine needs + to act on input from the PTY master device. This object must have + watch_file and remove_watch_file methods. + 'escape_sequence' is the urwid key symbol which should be used to break + out of the terminal widget. If it's not specified, "ctrl a" is used. + """ + self.__super.__init__() + + if escape_sequence is None: + self.escape_sequence = "ctrl a" + else: + self.escape_sequence = escape_sequence + + if env is None: + self.env = dict(os.environ) + else: + self.env = dict(env) + + self.keygrab = False + self.last_key = None + + self.response_buffer = [] + + self.term_modes = TermModes() + + self.main_loop = main_loop + + self.master = None + self.pid = None + + self.width = None + self.height = None + self.term = None + self.has_focus = False + self.terminated = False + + def spawn(self): + env = self.env + env['TERM'] = 'linux' + + self.pid, self.master = pty.fork() + + if self.pid == 0: + if callable(self.command): + try: + try: + self.command() + except: + sys.stderr.write(traceback.format_exc()) + sys.stderr.flush() + finally: + os._exit(0) + else: + os.execvpe(self.command[0], self.command, env) + + if self.main_loop is None: + fcntl.fcntl(self.master, fcntl.F_SETFL, os.O_NONBLOCK) + + atexit.register(self.terminate) + + def terminate(self): + if self.terminated: + return + + self.terminated = True + self.remove_watch() + self.change_focus(False) + + if self.pid > 0: + self.set_termsize(0, 0) + for sig in (signal.SIGHUP, signal.SIGCONT, signal.SIGINT, + signal.SIGTERM, signal.SIGKILL): + try: + os.kill(self.pid, sig) + pid, status = os.waitpid(self.pid, os.WNOHANG) + except OSError: + break + + if pid == 0: + break + time.sleep(0.1) + try: + os.waitpid(self.pid, 0) + except OSError: + pass + + os.close(self.master) + + def beep(self): + self._emit('beep') + + def leds(self, which): + self._emit('leds', which) + + def respond(self, string): + """ + Respond to the underlying application with 'string'. + """ + self.response_buffer.append(string) + + def flush_responses(self): + for string in self.response_buffer: + os.write(self.master, string.encode('ascii')) + self.response_buffer = [] + + def set_termsize(self, width, height): + winsize = struct.pack("HHHH", height, width, 0, 0) + fcntl.ioctl(self.master, termios.TIOCSWINSZ, winsize) + + def touch_term(self, width, height): + process_opened = False + + if self.pid is None: + self.spawn() + process_opened = True + + if self.width == width and self.height == height: + return + + self.set_termsize(width, height) + + if not self.term: + self.term = TermCanvas(width, height, self) + else: + self.term.resize(width, height) + + self.width = width + self.height = height + + if process_opened: + self.add_watch() + + def set_title(self, title): + self._emit('title', title) + + def change_focus(self, has_focus): + """ + Ignore SIGINT if this widget has focus. + """ + if self.terminated or self.has_focus == has_focus: + return + + self.has_focus = has_focus + + if has_focus: + self.old_tios = RealTerminal().tty_signal_keys() + RealTerminal().tty_signal_keys(*(['undefined'] * 5)) + else: + RealTerminal().tty_signal_keys(*self.old_tios) + + def render(self, size, focus=False): + if not self.terminated: + self.change_focus(focus) + + width, height = size + self.touch_term(width, height) + + if self.main_loop is None: + self.feed() + + return self.term + + def add_watch(self): + if self.main_loop is None: + return + + self.main_loop.watch_file(self.master, self.feed) + + def remove_watch(self): + if self.main_loop is None: + return + + self.main_loop.remove_watch_file(self.master) + + def selectable(self): + return True + + def wait_and_feed(self, timeout=1.0): + while True: + try: + select.select([self.master], [], [], timeout) + break + except select.error as e: + if e.args[0] != 4: + raise + self.feed() + + def feed(self): + data = '' + + try: + data = os.read(self.master, 4096) + except OSError as e: + if e.errno == 5: # End Of File + data = '' + elif e.errno == errno.EWOULDBLOCK: # empty buffer + return + else: + raise + + if data == '': # EOF on BSD + self.terminate() + self._emit('closed') + return + + self.term.addstr(data) + + self.flush_responses() + + def keypress(self, size, key): + if self.terminated: + return key + + if key == "window resize": + width, height = size + self.touch_term(width, height) + return + + if (self.last_key == self.escape_sequence + and key == self.escape_sequence): + # escape sequence pressed twice... + self.last_key = key + self.keygrab = True + # ... so pass it to the terminal + elif self.keygrab: + if self.escape_sequence == key: + # stop grabbing the terminal + self.keygrab = False + self.last_key = key + return + else: + if key == 'page up': + self.term.scroll_buffer() + self.last_key = key + self._invalidate() + return + elif key == 'page down': + self.term.scroll_buffer(up=False) + self.last_key = key + self._invalidate() + return + elif (self.last_key == self.escape_sequence + and key != self.escape_sequence): + # hand down keypress directly after ungrab. + self.last_key = key + return key + elif self.escape_sequence == key: + # start grabbing the terminal + self.keygrab = True + self.last_key = key + return + elif self._command_map[key] is None or key == 'enter': + # printable character or escape sequence means: + # lock in terminal... + self.keygrab = True + # ... and do key processing + else: + # hand down keypress + self.last_key = key + return key + + self.last_key = key + + self.term.scroll_buffer(reset=True) + + if key.startswith("ctrl "): + if key[-1].islower(): + key = chr(ord(key[-1]) - ord('a') + 1) + else: + key = chr(ord(key[-1]) - ord('A') + 1) + else: + if self.term_modes.keys_decckm and key in KEY_TRANSLATIONS_DECCKM: + key = KEY_TRANSLATIONS_DECCKM.get(key) + else: + key = KEY_TRANSLATIONS.get(key, key) + + # ENTER transmits both a carriage return and linefeed in LF/NL mode. + if self.term_modes.lfnl and key == "\x0d": + key += "\x0a" + + if PYTHON3: + key = key.encode('ascii') + + os.write(self.master, key) +