everything is a mess

This commit is contained in:
Anthony Scopatz 2015-01-21 17:04:13 -05:00
commit 5757edb15e
7 changed files with 822 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*.pyc

3
scripts/xonsh Executable file
View file

@ -0,0 +1,3 @@
#!/usr/bin/env python
from xonsh.main import main
main()

0
xonsh/__init__.py Normal file
View file

452
xonsh/lexer.py Normal file
View file

@ -0,0 +1,452 @@
from __future__ import print_function, unicode_literals
import re
import sys
from ply import lex
from ply.lex import TOKEN
class Lexer(object):
"""Implements a lexer for the xonsh language.
"""
def __init__(self, errfunc=lambda e, l, c: print(e)):
"""
Parameters
----------
errfunc : function, optional
An error function callback. Accepts an error
message, line and column as arguments.
Attributes
----------
lexer : a PLY lexer bound to self
fname : str
Filename
last : token
The last token seen.
lineno : int
The last line number seen.
"""
self.errfunc = errfunc
self.fname = ''
self.last = None
def build(self, **kwargs):
"""Part of the PLY lexer API.
"""
self.lexer = lex.lex(object=self, **kwargs)
@property
def lineno(self):
return self.lexer.lineno
@lineno.setter
def lineno(self, value)
self.lexer.lineno = 1
def input(self, s):
"""Calls the lexer on the string s."""
self.lexer.input(s)
def token(self):
"""Retrieves the next token."""
self.last = self.lexer.token()
return self.last
def token_col(self, token):
"""Discovers the token column number."""
offset = self.lexer.lexdata.rfind('\n', 0, token.lexpos)
return token.lexpos - offset
######################-- PRIVATE --######################
##
## Internal auxiliary methods
##
def _error(self, msg, token):
location = self._make_tok_location(token)
self.error_func(msg, location[0], location[1])
self.lexer.skip(1)
def _make_tok_location(self, token):
return (token.lineno, self.find_tok_column(token))
##
## Reserved keywords
##
keywords = (
'_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST',
'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'REGISTER',
'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
'VOLATILE', 'WHILE',
)
keyword_map = {}
for keyword in keywords:
if keyword == '_BOOL':
keyword_map['_Bool'] = keyword
elif keyword == '_COMPLEX':
keyword_map['_Complex'] = keyword
else:
keyword_map[keyword.lower()] = keyword
##
## All the tokens recognized by the lexer
##
tokens = keywords + (
# Identifiers
'ID',
# Type identifiers (identifiers previously defined as
# types with typedef)
'TYPEID',
# constants
'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
'FLOAT_CONST', 'HEX_FLOAT_CONST',
'CHAR_CONST',
'WCHAR_CONST',
# String literals
'STRING_LITERAL',
'WSTRING_LITERAL',
# Operators
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
'OREQUAL',
# Increment/decrement
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Conditional operator (?)
'CONDOP',
# Delimeters
'LPAREN', 'RPAREN', # ( )
'LBRACKET', 'RBRACKET', # [ ]
'LBRACE', 'RBRACE', # { }
'COMMA', 'PERIOD', # . ,
'SEMI', 'COLON', # ; :
# Ellipsis (...)
'ELLIPSIS',
# pre-processor
'PPHASH', # '#'
)
##
## Regexes for use in tokens
##
##
# valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
hex_prefix = '0[xX]'
hex_digits = '[0-9a-fA-F]+'
# integer constants (K&R2: A.2.5.1)
integer_suffix_opt = r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
octal_constant = '0[0-7]*'+integer_suffix_opt
hex_constant = hex_prefix+hex_digits+integer_suffix_opt
bad_octal_constant = '0[0-7]*[89]'
# character constants (K&R2: A.2.5.2)
# Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
# directives with Windows paths as filenames (..\..\dir\file)
# For the same reason, decimal_escape allows all digit sequences. We want to
# parse all correct code, even if it means to sometimes parse incorrect
# code.
#
simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
decimal_escape = r"""(\d+)"""
hex_escape = r"""(x[0-9a-fA-F]+)"""
bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
char_const = "'"+cconst_char+"'"
wchar_const = 'L'+char_const
unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
# string literals (K&R2: A.2.6)
string_char = r"""([^"\\\n]|"""+escape_sequence+')'
string_literal = '"'+string_char+'*"'
wstring_literal = 'L'+string_literal
bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
# floating constants (K&R2: A.2.5.3)
exponent_part = r"""([eE][-+]?[0-9]+)"""
fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
binary_exponent_part = r'''([pP][+-]?[0-9]+)'''
hex_fractional_constant = '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))"""
hex_floating_constant = '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+binary_exponent_part+'[FfLl]?)'
##
## Lexer states: used for preprocessor \n-terminated directives
##
states = (
# ppline: preprocessor line directives
#
('ppline', 'exclusive'),
# pppragma: pragma
#
('pppragma', 'exclusive'),
)
def t_PPHASH(self, t):
r'[ \t]*\#'
if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
t.lexer.begin('ppline')
self.pp_line = self.pp_filename = None
elif self.pragma_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
t.lexer.begin('pppragma')
else:
t.type = 'PPHASH'
return t
##
## Rules for the ppline state
##
@TOKEN(string_literal)
def t_ppline_FILENAME(self, t):
if self.pp_line is None:
self._error('filename before line number in #line', t)
else:
self.pp_filename = t.value.lstrip('"').rstrip('"')
@TOKEN(decimal_constant)
def t_ppline_LINE_NUMBER(self, t):
if self.pp_line is None:
self.pp_line = t.value
else:
# Ignore: GCC's cpp sometimes inserts a numeric flag
# after the file name
pass
def t_ppline_NEWLINE(self, t):
r'\n'
if self.pp_line is None:
self._error('line number missing in #line', t)
else:
self.lexer.lineno = int(self.pp_line)
if self.pp_filename is not None:
self.filename = self.pp_filename
t.lexer.begin('INITIAL')
def t_ppline_PPLINE(self, t):
r'line'
pass
t_ppline_ignore = ' \t'
def t_ppline_error(self, t):
self._error('invalid #line directive', t)
##
## Rules for the pppragma state
##
def t_pppragma_NEWLINE(self, t):
r'\n'
t.lexer.lineno += 1
t.lexer.begin('INITIAL')
def t_pppragma_PPPRAGMA(self, t):
r'pragma'
pass
t_pppragma_ignore = ' \t<>.-{}();+-*/$%@&^~!?:,0123456789'
@TOKEN(string_literal)
def t_pppragma_STR(self, t): pass
@TOKEN(identifier)
def t_pppragma_ID(self, t): pass
def t_pppragma_error(self, t):
self._error('invalid #pragma directive', t)
##
## Rules for the normal state
##
t_ignore = ' \t'
# Newlines
def t_NEWLINE(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'\^='
# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'
# ->
t_ARROW = r'->'
# ?
t_CONDOP = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Scope delimiters
# To see why on_lbrace_func is needed, consider:
# typedef char TT;
# void foo(int TT) { TT = 10; }
# TT x = 5;
# Outside the function, TT is a typedef, but inside (starting and ending
# with the braces) it's a parameter. The trouble begins with yacc's
# lookahead token. If we open a new scope in brace_open, then TT has
# already been read and incorrectly interpreted as TYPEID. So, we need
# to open and close scopes from within the lexer.
# Similar for the TT immediately outside the end of the function.
#
@TOKEN(r'\{')
def t_LBRACE(self, t):
self.on_lbrace_func()
return t
@TOKEN(r'\}')
def t_RBRACE(self, t):
self.on_rbrace_func()
return t
t_STRING_LITERAL = string_literal
# The following floating and integer constants are defined as
# functions to impose a strict order (otherwise, decimal
# is placed before the others because its regex is longer,
# and this is bad)
#
@TOKEN(floating_constant)
def t_FLOAT_CONST(self, t):
return t
@TOKEN(hex_floating_constant)
def t_HEX_FLOAT_CONST(self, t):
return t
@TOKEN(hex_constant)
def t_INT_CONST_HEX(self, t):
return t
@TOKEN(bad_octal_constant)
def t_BAD_CONST_OCT(self, t):
msg = "Invalid octal constant"
self._error(msg, t)
@TOKEN(octal_constant)
def t_INT_CONST_OCT(self, t):
return t
@TOKEN(decimal_constant)
def t_INT_CONST_DEC(self, t):
return t
# Must come before bad_char_const, to prevent it from
# catching valid char constants as invalid
#
@TOKEN(char_const)
def t_CHAR_CONST(self, t):
return t
@TOKEN(wchar_const)
def t_WCHAR_CONST(self, t):
return t
@TOKEN(unmatched_quote)
def t_UNMATCHED_QUOTE(self, t):
msg = "Unmatched '"
self._error(msg, t)
@TOKEN(bad_char_const)
def t_BAD_CHAR_CONST(self, t):
msg = "Invalid char constant %s" % t.value
self._error(msg, t)
@TOKEN(wstring_literal)
def t_WSTRING_LITERAL(self, t):
return t
# unmatched string literals are caught by the preprocessor
@TOKEN(bad_string_literal)
def t_BAD_STRING_LITERAL(self, t):
msg = "String contains invalid escape code"
self._error(msg, t)
@TOKEN(identifier)
def t_ID(self, t):
t.type = self.keyword_map.get(t.value, "ID")
if t.type == 'ID' and self.type_lookup_func(t.value):
t.type = "TYPEID"
return t
def t_error(self, t):
msg = 'Illegal character %s' % repr(t.value[0])
self._error(msg, t)

20
xonsh/main.py Normal file
View file

@ -0,0 +1,20 @@
"""The main xonsh script."""
import os
import sys
import shlex
import subprocess
from argparse import ArgumentParser, Namespace
import urwid
from xonsh.main_display import MainDisplay
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
disp = MainDisplay()
disp.main()
if __name__ == '__main__':
main()

42
xonsh/main_display.py Normal file
View file

@ -0,0 +1,42 @@
"""The main xonsh displaye."""
import urwid
from xonsh.shell_view import ShellView
class MainDisplay(object):
def __init__(self):
self.shell = shell = ShellView()
self.view = urwid.LineBox(
urwid.Pile([
('weight', 70, shell),
('fixed', 1, urwid.Filler(urwid.Edit('focus test edit: '))),
]),
)
urwid.connect_signal(shell, 'title', self.set_title)
urwid.connect_signal(shell, 'closed', self.quit)
def set_title(self, widget, title):
self.view.set_title(title)
def quit(self, *args, **kwargs):
raise urwid.ExitMainLoop()
def handle_key(self, key):
if key in ('q', 'Q'):
self.quit()
def main(self, line=1, col=1):
loop = urwid.MainLoop(self.view,
handle_mouse=False,
unhandled_input=self.handle_key)
loop.screen.set_terminal_properties(256)
self.loop = self.shell.main_loop = loop
while True:
try:
self.loop.run()
except KeyboardInterrupt:
self.reset_status(status="YOLO! ")
else:
break

304
xonsh/shell_view.py Normal file
View file

@ -0,0 +1,304 @@
"""The main shell for xonsh."""
import os
from urwid import Widget
class ShellView(Widget):
_selectable = True
_sizing = frozenset(['box'])
signals = ['closed', 'beep', 'leds', 'title']
def __init__(self, env=None, main_loop=None, escape_sequence=None):
"""
A terminal emulator within a widget.
'command' is the command to execute inside the terminal, provided as a
list of the command followed by its arguments. If 'command' is None,
the command is the current user's shell. You can also provide a callable
instead of a command, which will be executed in the subprocess.
'env' can be used to pass custom environment variables. If omitted,
os.environ is used.
'main_loop' should be provided, because the canvas state machine needs
to act on input from the PTY master device. This object must have
watch_file and remove_watch_file methods.
'escape_sequence' is the urwid key symbol which should be used to break
out of the terminal widget. If it's not specified, "ctrl a" is used.
"""
self.__super.__init__()
if escape_sequence is None:
self.escape_sequence = "ctrl a"
else:
self.escape_sequence = escape_sequence
if env is None:
self.env = dict(os.environ)
else:
self.env = dict(env)
self.keygrab = False
self.last_key = None
self.response_buffer = []
self.term_modes = TermModes()
self.main_loop = main_loop
self.master = None
self.pid = None
self.width = None
self.height = None
self.term = None
self.has_focus = False
self.terminated = False
def spawn(self):
env = self.env
env['TERM'] = 'linux'
self.pid, self.master = pty.fork()
if self.pid == 0:
if callable(self.command):
try:
try:
self.command()
except:
sys.stderr.write(traceback.format_exc())
sys.stderr.flush()
finally:
os._exit(0)
else:
os.execvpe(self.command[0], self.command, env)
if self.main_loop is None:
fcntl.fcntl(self.master, fcntl.F_SETFL, os.O_NONBLOCK)
atexit.register(self.terminate)
def terminate(self):
if self.terminated:
return
self.terminated = True
self.remove_watch()
self.change_focus(False)
if self.pid > 0:
self.set_termsize(0, 0)
for sig in (signal.SIGHUP, signal.SIGCONT, signal.SIGINT,
signal.SIGTERM, signal.SIGKILL):
try:
os.kill(self.pid, sig)
pid, status = os.waitpid(self.pid, os.WNOHANG)
except OSError:
break
if pid == 0:
break
time.sleep(0.1)
try:
os.waitpid(self.pid, 0)
except OSError:
pass
os.close(self.master)
def beep(self):
self._emit('beep')
def leds(self, which):
self._emit('leds', which)
def respond(self, string):
"""
Respond to the underlying application with 'string'.
"""
self.response_buffer.append(string)
def flush_responses(self):
for string in self.response_buffer:
os.write(self.master, string.encode('ascii'))
self.response_buffer = []
def set_termsize(self, width, height):
winsize = struct.pack("HHHH", height, width, 0, 0)
fcntl.ioctl(self.master, termios.TIOCSWINSZ, winsize)
def touch_term(self, width, height):
process_opened = False
if self.pid is None:
self.spawn()
process_opened = True
if self.width == width and self.height == height:
return
self.set_termsize(width, height)
if not self.term:
self.term = TermCanvas(width, height, self)
else:
self.term.resize(width, height)
self.width = width
self.height = height
if process_opened:
self.add_watch()
def set_title(self, title):
self._emit('title', title)
def change_focus(self, has_focus):
"""
Ignore SIGINT if this widget has focus.
"""
if self.terminated or self.has_focus == has_focus:
return
self.has_focus = has_focus
if has_focus:
self.old_tios = RealTerminal().tty_signal_keys()
RealTerminal().tty_signal_keys(*(['undefined'] * 5))
else:
RealTerminal().tty_signal_keys(*self.old_tios)
def render(self, size, focus=False):
if not self.terminated:
self.change_focus(focus)
width, height = size
self.touch_term(width, height)
if self.main_loop is None:
self.feed()
return self.term
def add_watch(self):
if self.main_loop is None:
return
self.main_loop.watch_file(self.master, self.feed)
def remove_watch(self):
if self.main_loop is None:
return
self.main_loop.remove_watch_file(self.master)
def selectable(self):
return True
def wait_and_feed(self, timeout=1.0):
while True:
try:
select.select([self.master], [], [], timeout)
break
except select.error as e:
if e.args[0] != 4:
raise
self.feed()
def feed(self):
data = ''
try:
data = os.read(self.master, 4096)
except OSError as e:
if e.errno == 5: # End Of File
data = ''
elif e.errno == errno.EWOULDBLOCK: # empty buffer
return
else:
raise
if data == '': # EOF on BSD
self.terminate()
self._emit('closed')
return
self.term.addstr(data)
self.flush_responses()
def keypress(self, size, key):
if self.terminated:
return key
if key == "window resize":
width, height = size
self.touch_term(width, height)
return
if (self.last_key == self.escape_sequence
and key == self.escape_sequence):
# escape sequence pressed twice...
self.last_key = key
self.keygrab = True
# ... so pass it to the terminal
elif self.keygrab:
if self.escape_sequence == key:
# stop grabbing the terminal
self.keygrab = False
self.last_key = key
return
else:
if key == 'page up':
self.term.scroll_buffer()
self.last_key = key
self._invalidate()
return
elif key == 'page down':
self.term.scroll_buffer(up=False)
self.last_key = key
self._invalidate()
return
elif (self.last_key == self.escape_sequence
and key != self.escape_sequence):
# hand down keypress directly after ungrab.
self.last_key = key
return key
elif self.escape_sequence == key:
# start grabbing the terminal
self.keygrab = True
self.last_key = key
return
elif self._command_map[key] is None or key == 'enter':
# printable character or escape sequence means:
# lock in terminal...
self.keygrab = True
# ... and do key processing
else:
# hand down keypress
self.last_key = key
return key
self.last_key = key
self.term.scroll_buffer(reset=True)
if key.startswith("ctrl "):
if key[-1].islower():
key = chr(ord(key[-1]) - ord('a') + 1)
else:
key = chr(ord(key[-1]) - ord('A') + 1)
else:
if self.term_modes.keys_decckm and key in KEY_TRANSLATIONS_DECCKM:
key = KEY_TRANSLATIONS_DECCKM.get(key)
else:
key = KEY_TRANSLATIONS.get(key, key)
# ENTER transmits both a carriage return and linefeed in LF/NL mode.
if self.term_modes.lfnl and key == "\x0d":
key += "\x0a"
if PYTHON3:
key = key.encode('ascii')
os.write(self.master, key)