2016-06-13 01:28:27 -04:00
|
|
|
#!/usr/bin/env python
|
2016-06-12 11:07:09 -04:00
|
|
|
"""A package-based, source code amalgamater."""
|
|
|
|
import os
|
2016-06-12 12:31:50 -04:00
|
|
|
import sys
|
2016-06-12 13:18:50 -04:00
|
|
|
import pprint
|
2016-06-13 00:46:35 -04:00
|
|
|
from itertools import repeat
|
2016-06-12 12:31:50 -04:00
|
|
|
from collections import namedtuple
|
|
|
|
from collections.abc import Mapping
|
|
|
|
from ast import parse, walk, literal_eval, Import, ImportFrom
|
|
|
|
|
|
|
|
ModNode = namedtuple('ModNode', ['name', 'pkgdeps', 'extdeps'])
|
|
|
|
ModNode.__doc__ = """Module node for dependency graph.
|
|
|
|
|
2016-06-16 20:20:10 -04:00
|
|
|
Attributes
|
|
|
|
----------
|
2016-06-12 12:31:50 -04:00
|
|
|
name : str
|
|
|
|
Module name.
|
|
|
|
pkgdeps : frozenset of str
|
|
|
|
Module dependencies in the same package.
|
|
|
|
extdeps : frozenset of str
|
|
|
|
External module dependencies from outside of the package.
|
|
|
|
"""
|
|
|
|
|
2016-06-16 20:20:10 -04:00
|
|
|
|
2016-06-12 12:31:50 -04:00
|
|
|
class SourceCache(Mapping):
|
|
|
|
"""Stores / loads source code for files based on package and module names."""
|
|
|
|
|
2016-06-12 13:18:50 -04:00
|
|
|
def __init__(self, *args, **kwargs):
|
2016-06-12 12:31:50 -04:00
|
|
|
self._d = dict(*args, **kwargs)
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
d = self._d
|
|
|
|
if key in d:
|
|
|
|
return d[key]
|
|
|
|
pkg, name = key
|
|
|
|
pkgdir = pkg.replace('.', os.sep)
|
|
|
|
fname = pkgdir + os.sep + name + '.py'
|
2016-06-17 16:05:46 -04:00
|
|
|
with open(fname, encoding='utf-8', errors='surrogateescape') as f:
|
2016-06-12 12:31:50 -04:00
|
|
|
raw = f.read()
|
|
|
|
d[key] = raw
|
|
|
|
return raw
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
yield from self._d
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self._d)
|
|
|
|
|
|
|
|
|
|
|
|
SOURCES = SourceCache()
|
|
|
|
|
2016-06-12 13:18:50 -04:00
|
|
|
def make_node(name, pkg, allowed):
|
2016-06-12 12:31:50 -04:00
|
|
|
"""Makes a node by parsing a file and traversing its AST."""
|
|
|
|
raw = SOURCES[pkg, name]
|
|
|
|
tree = parse(raw, filename=name)
|
|
|
|
# we only want to deal with global import statements
|
|
|
|
pkgdot = pkg + '.'
|
|
|
|
pkgdeps = set()
|
|
|
|
extdeps = set()
|
|
|
|
for a in tree.body:
|
|
|
|
if isinstance(a, Import):
|
|
|
|
for n in a.names:
|
2016-06-12 13:18:50 -04:00
|
|
|
p, dot, m = n.name.rpartition('.')
|
|
|
|
if p == pkg and m in allowed:
|
|
|
|
pkgdeps.add(m)
|
2016-06-12 12:31:50 -04:00
|
|
|
else:
|
|
|
|
extdeps.add(n.name)
|
|
|
|
elif isinstance(a, ImportFrom):
|
|
|
|
if a.module == pkg:
|
2016-06-12 13:18:50 -04:00
|
|
|
pkgdeps.update(n.name for n in a.names if n.name in allowed)
|
2016-06-12 12:31:50 -04:00
|
|
|
elif a.module.startswith(pkgdot):
|
|
|
|
p, dot, m = a.module.rpartition('.')
|
2016-06-12 13:18:50 -04:00
|
|
|
if p == pkg and m in allowed:
|
2016-06-12 12:31:50 -04:00
|
|
|
pkgdeps.add(m)
|
|
|
|
else:
|
|
|
|
extdeps.add(a.module)
|
|
|
|
return ModNode(name, frozenset(pkgdeps), frozenset(extdeps))
|
|
|
|
|
|
|
|
|
2016-06-12 13:26:59 -04:00
|
|
|
def make_graph(pkg, exclude=None):
|
2016-06-12 12:31:50 -04:00
|
|
|
"""Create a graph (dict) of module dependencies."""
|
|
|
|
graph = {}
|
|
|
|
pkgdir = pkg.replace('.', os.sep)
|
2016-06-12 13:18:50 -04:00
|
|
|
allowed = set()
|
|
|
|
files = os.listdir(pkgdir)
|
|
|
|
for fname in files:
|
2016-06-12 12:31:50 -04:00
|
|
|
base, ext = os.path.splitext(fname)
|
|
|
|
if base.startswith('__') or ext != '.py':
|
|
|
|
continue
|
2016-06-12 13:18:50 -04:00
|
|
|
allowed.add(base)
|
2016-06-12 13:26:59 -04:00
|
|
|
if exclude:
|
|
|
|
allowed -= exclude
|
2016-06-12 13:18:50 -04:00
|
|
|
for base in allowed:
|
|
|
|
graph[base] = make_node(base, pkg, allowed)
|
2016-06-12 12:31:50 -04:00
|
|
|
return graph
|
|
|
|
|
|
|
|
|
2016-06-12 13:18:50 -04:00
|
|
|
def depsort(graph):
|
|
|
|
"""Sort modules by dependency."""
|
|
|
|
remaining = set(graph.keys())
|
|
|
|
seder = []
|
|
|
|
solved = set()
|
|
|
|
while 0 < len(remaining):
|
|
|
|
nodeps = {m for m in remaining if len(graph[m].pkgdeps - solved) == 0}
|
|
|
|
if len(nodeps) == 0:
|
|
|
|
msg = ('\nsolved order = {0}\nremaining = {1}\nCycle detected in '
|
|
|
|
'module graph!').format(pprint.pformat(seder),
|
|
|
|
pprint.pformat(remaining))
|
|
|
|
raise RuntimeError(msg)
|
|
|
|
solved |= nodeps
|
|
|
|
remaining -= nodeps
|
|
|
|
seder += sorted(nodeps)
|
|
|
|
return seder
|
|
|
|
|
2016-06-12 12:31:50 -04:00
|
|
|
|
2016-06-13 22:12:41 -04:00
|
|
|
LAZY_IMPORTS = """
|
|
|
|
from sys import modules as _modules
|
|
|
|
from types import ModuleType as _ModuleType
|
2016-06-13 22:55:02 -04:00
|
|
|
from importlib import import_module as _import_module
|
2016-06-13 22:12:41 -04:00
|
|
|
|
|
|
|
|
|
|
|
class _LazyModule(_ModuleType):
|
|
|
|
|
2016-06-13 22:39:14 -04:00
|
|
|
def __init__(self, pkg, mod, asname=None):
|
2016-06-13 22:12:41 -04:00
|
|
|
'''Lazy module 'pkg.mod' in package 'pkg'.'''
|
2016-06-14 23:46:13 -04:00
|
|
|
self.__dct__ = {
|
|
|
|
'loaded': False,
|
|
|
|
'pkg': pkg, # pkg
|
2016-06-15 00:44:01 -04:00
|
|
|
'mod': mod, # pkg.mod
|
|
|
|
'asname': asname, # alias
|
2016-06-14 23:46:13 -04:00
|
|
|
}
|
2016-06-13 22:12:41 -04:00
|
|
|
|
|
|
|
@classmethod
|
2016-06-13 22:39:14 -04:00
|
|
|
def load(cls, pkg, mod, asname=None):
|
2016-06-13 22:12:41 -04:00
|
|
|
if mod in _modules:
|
|
|
|
return _modules[pkg]
|
|
|
|
else:
|
2016-06-13 22:39:14 -04:00
|
|
|
return cls(pkg, mod, asname)
|
2016-06-13 22:12:41 -04:00
|
|
|
|
|
|
|
def __getattribute__(self, name):
|
2016-06-13 22:55:02 -04:00
|
|
|
if name == '__dct__':
|
|
|
|
return super().__getattribute__(name)
|
|
|
|
dct = self.__dct__
|
2016-06-13 22:39:14 -04:00
|
|
|
mod = dct['mod']
|
|
|
|
if dct['loaded']:
|
2016-06-13 22:12:41 -04:00
|
|
|
m = _modules[mod]
|
|
|
|
else:
|
|
|
|
m = _import_module(mod)
|
2016-06-13 22:39:14 -04:00
|
|
|
glbs = globals()
|
|
|
|
pkg = dct['pkg']
|
|
|
|
asname = dct['asname']
|
|
|
|
if asname is None:
|
2016-06-26 14:41:53 -04:00
|
|
|
glbs[pkg] = m = _modules[pkg]
|
2016-06-13 22:39:14 -04:00
|
|
|
else:
|
|
|
|
glbs[asname] = m
|
|
|
|
dct['loaded'] = True
|
2016-06-13 22:12:41 -04:00
|
|
|
return getattr(m, name)
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2016-06-13 00:46:35 -04:00
|
|
|
def get_lineno(node, default=0):
|
|
|
|
"""Gets the lineno of a node or returns the default."""
|
|
|
|
return getattr(node, 'lineno', default)
|
|
|
|
|
|
|
|
|
|
|
|
def min_line(node):
|
|
|
|
"""Computes the minimum lineno."""
|
|
|
|
node_line = get_lineno(node)
|
|
|
|
return min(map(get_lineno, walk(node), repeat(node_line)))
|
|
|
|
|
|
|
|
|
|
|
|
def format_import(names):
|
|
|
|
"""Format an import line"""
|
|
|
|
parts = []
|
|
|
|
for _, name, asname in names:
|
|
|
|
if asname is None:
|
|
|
|
parts.append(name)
|
|
|
|
else:
|
|
|
|
parts.append(name + ' as ' + asname)
|
|
|
|
line = 'import ' + ', '.join(parts) + '\n'
|
|
|
|
return line
|
|
|
|
|
|
|
|
|
2016-06-13 22:39:14 -04:00
|
|
|
def format_lazy_import(names):
|
|
|
|
"""Formats lazy import lines"""
|
|
|
|
lines = ''
|
|
|
|
for _, name, asname in names:
|
|
|
|
pkg, _, _ = name.partition('.')
|
|
|
|
target = asname or pkg
|
|
|
|
if asname is None:
|
|
|
|
line = '{pkg} = _LazyModule.load({pkg!r}, {mod!r})\n'
|
|
|
|
else:
|
|
|
|
line = '{asname} = _LazyModule.load({pkg!r}, {mod!r}, {asname!r})\n'
|
|
|
|
lines += line.format(pkg=pkg, mod=name, asname=asname)
|
|
|
|
return lines
|
|
|
|
|
|
|
|
|
2016-06-13 01:23:11 -04:00
|
|
|
def format_from_import(names):
|
|
|
|
"""Format a from import line"""
|
|
|
|
parts = []
|
|
|
|
for _, module, name, asname in names:
|
|
|
|
if asname is None:
|
|
|
|
parts.append(name)
|
|
|
|
else:
|
|
|
|
parts.append(name + ' as ' + asname)
|
|
|
|
line = 'from ' + module
|
|
|
|
line += ' import ' + ', '.join(parts) + '\n'
|
|
|
|
return line
|
|
|
|
|
|
|
|
|
2016-06-13 00:46:35 -04:00
|
|
|
def rewrite_imports(name, pkg, order, imps):
|
|
|
|
"""Rewrite the global imports in the file given the amalgamation."""
|
2016-06-13 01:23:11 -04:00
|
|
|
pkgdot = pkg + '.'
|
2016-06-13 00:46:35 -04:00
|
|
|
raw = SOURCES[pkg, name]
|
|
|
|
tree = parse(raw, filename=name)
|
|
|
|
replacements = [] # list of (startline, stopline, str) tuples
|
|
|
|
# collect replacements in forward direction
|
|
|
|
for a, b in zip(tree.body, tree.body[1:] + [None]):
|
2016-06-13 01:28:27 -04:00
|
|
|
if not isinstance(a, (Import, ImportFrom)):
|
2016-06-13 00:46:35 -04:00
|
|
|
continue
|
|
|
|
start = min_line(a) - 1
|
|
|
|
stop = len(tree.body) if b is None else min_line(b) - 1
|
|
|
|
if isinstance(a, Import):
|
|
|
|
keep = []
|
|
|
|
for n in a.names:
|
2016-06-13 01:23:11 -04:00
|
|
|
p, dot, m = n.name.rpartition('.')
|
|
|
|
if p == pkg and m in order:
|
|
|
|
msg = ('Cannot amalgamate almagate import of '
|
|
|
|
'amalgamated module:\n\n import {0}.{1}\n'
|
|
|
|
'\nin {0}/{2}.py').format(pkg, n.name, name)
|
|
|
|
raise RuntimeError(msg)
|
2016-06-13 00:46:35 -04:00
|
|
|
imp = (Import, n.name, n.asname)
|
|
|
|
if imp not in imps:
|
|
|
|
imps.add(imp)
|
|
|
|
keep.append(imp)
|
2016-06-16 20:20:10 -04:00
|
|
|
if len(keep) == 0:
|
2016-06-13 01:23:11 -04:00
|
|
|
s = ', '.join(n.name for n in a.names)
|
|
|
|
s = '# amalgamated ' + s + '\n'
|
|
|
|
else:
|
2016-06-13 22:39:14 -04:00
|
|
|
s = format_lazy_import(keep)
|
2016-06-13 00:46:35 -04:00
|
|
|
replacements.append((start, stop, s))
|
|
|
|
elif isinstance(a, ImportFrom):
|
2016-06-13 01:23:11 -04:00
|
|
|
p, dot, m = a.module.rpartition('.')
|
2016-06-13 00:46:35 -04:00
|
|
|
if a.module == pkg:
|
2016-06-13 01:23:11 -04:00
|
|
|
for n in a.names:
|
|
|
|
if n.name in order:
|
2016-06-13 01:47:43 -04:00
|
|
|
msg = ('Cannot amalgamate import of '
|
2016-06-13 01:23:11 -04:00
|
|
|
'amalgamated module:\n\n from {0} import {1}\n'
|
|
|
|
'\nin {0}/{2}.py').format(pkg, n.name, name)
|
|
|
|
raise RuntimeError(msg)
|
|
|
|
elif a.module.startswith(pkgdot) and p == pkg and m in order:
|
|
|
|
replacements.append((start, stop,
|
|
|
|
'# amalgamated ' + a.module + '\n'))
|
|
|
|
else:
|
|
|
|
keep = []
|
|
|
|
for n in a.names:
|
|
|
|
imp = (ImportFrom, a.module, n.name, n.asname)
|
|
|
|
if imp not in imps:
|
|
|
|
imps.add(imp)
|
|
|
|
keep.append(imp)
|
|
|
|
if len(keep) == len(a.names):
|
|
|
|
continue # all new imports
|
|
|
|
elif len(keep) == 0:
|
2016-06-16 20:20:10 -04:00
|
|
|
s = ', '.join(n.name for n in a.names)
|
2016-06-13 01:28:27 -04:00
|
|
|
s = '# amalgamated from ' + a.module + ' import ' + s + '\n'
|
2016-06-13 00:46:35 -04:00
|
|
|
else:
|
2016-06-13 01:23:11 -04:00
|
|
|
s = format_from_import(keep)
|
|
|
|
replacements.append((start, stop, s))
|
2016-06-13 00:46:35 -04:00
|
|
|
# apply replacements in reverse
|
|
|
|
lines = raw.splitlines(keepends=True)
|
|
|
|
for start, stop, s in replacements[::-1]:
|
|
|
|
lines[start] = s
|
|
|
|
for i in range(stop - start - 1):
|
|
|
|
del lines[start+1]
|
|
|
|
return ''.join(lines)
|
|
|
|
|
|
|
|
|
|
|
|
def amalgamate(order, graph, pkg):
|
|
|
|
"""Create amalgamated source."""
|
2016-06-13 22:12:41 -04:00
|
|
|
src = ('\"\"\"Amalgamation of {0} package, made up of the following '
|
|
|
|
'modules, in order:\n\n* ').format(pkg)
|
|
|
|
src += '\n* '.join(order)
|
|
|
|
src += '\n\n\"\"\"\n'
|
|
|
|
src += LAZY_IMPORTS
|
2016-06-13 00:46:35 -04:00
|
|
|
imps = set()
|
|
|
|
for name in order:
|
|
|
|
lines = rewrite_imports(name, pkg, order, imps)
|
|
|
|
src += '#\n# ' + name + '\n#\n' + lines + '\n'
|
|
|
|
return src
|
|
|
|
|
|
|
|
|
|
|
|
def write_amalgam(src, pkg):
|
|
|
|
"""Write out __amalgam__.py file"""
|
|
|
|
pkgdir = pkg.replace('.', os.sep)
|
|
|
|
fname = os.path.join(pkgdir, '__amalgam__.py')
|
2016-06-18 16:32:36 -04:00
|
|
|
with open(fname, 'w', encoding='utf-8', errors='surrogateescape') as f:
|
2016-06-13 00:46:35 -04:00
|
|
|
f.write(src)
|
|
|
|
|
|
|
|
|
2016-06-13 02:37:54 -04:00
|
|
|
def _init_name_lines(pkg):
|
|
|
|
pkgdir = pkg.replace('.', os.sep)
|
|
|
|
fname = os.path.join(pkgdir, '__init__.py')
|
2016-06-18 16:32:36 -04:00
|
|
|
with open(fname, encoding='utf-8', errors='surrogateescape') as f:
|
2016-06-13 02:37:54 -04:00
|
|
|
raw = f.read()
|
|
|
|
lines = raw.splitlines()
|
|
|
|
return fname, lines
|
|
|
|
|
|
|
|
|
|
|
|
def read_exclude(pkg):
|
|
|
|
"""reads in modules to exclude from __init__.py"""
|
|
|
|
_, lines = _init_name_lines(pkg)
|
|
|
|
exclude = set()
|
|
|
|
for line in lines:
|
|
|
|
if line.startswith('# amalgamate exclude'):
|
|
|
|
exclude.update(line.split()[3:])
|
|
|
|
return exclude
|
|
|
|
|
|
|
|
|
|
|
|
FAKE_LOAD = """
|
2016-06-15 22:11:17 -04:00
|
|
|
import os as _os
|
2016-06-16 11:03:43 -04:00
|
|
|
if _os.getenv('{debug}', ''):
|
2016-06-13 02:37:54 -04:00
|
|
|
pass
|
2016-06-15 22:11:17 -04:00
|
|
|
else:
|
|
|
|
import sys as _sys
|
|
|
|
try:
|
|
|
|
from {pkg} import __amalgam__
|
|
|
|
{load}
|
|
|
|
del __amalgam__
|
|
|
|
except ImportError:
|
|
|
|
pass
|
|
|
|
del _sys
|
|
|
|
del _os
|
2016-06-13 02:37:54 -04:00
|
|
|
""".strip()
|
|
|
|
|
|
|
|
|
2016-06-16 11:03:43 -04:00
|
|
|
def rewrite_init(pkg, order, debug='DEBUG'):
|
2016-06-13 02:37:54 -04:00
|
|
|
"""Rewrites the init file to insert modules."""
|
|
|
|
fname, lines = _init_name_lines(pkg)
|
|
|
|
for i, line in enumerate(lines):
|
|
|
|
if line.startswith('# amalgamate end'):
|
|
|
|
stop = i
|
|
|
|
elif line.startswith('# amalgamate'):
|
|
|
|
start = i
|
2016-06-15 22:11:17 -04:00
|
|
|
t = ("{1} = __amalgam__\n "
|
2016-06-13 11:52:45 -04:00
|
|
|
"_sys.modules['{0}.{1}'] = __amalgam__")
|
2016-06-15 22:11:17 -04:00
|
|
|
load = '\n '.join(t.format(pkg, m) for m in order)
|
2016-06-16 11:03:43 -04:00
|
|
|
s = FAKE_LOAD.format(pkg=pkg, load=load, debug=debug)
|
2016-06-13 02:37:54 -04:00
|
|
|
if start + 1 == stop:
|
|
|
|
lines.insert(stop, s)
|
|
|
|
else:
|
|
|
|
lines[start+1] = s
|
|
|
|
lines = lines[:start+2] + lines[stop:]
|
2016-06-25 12:44:08 -04:00
|
|
|
init = '\n'.join(lines) + '\n'
|
2016-06-18 16:32:36 -04:00
|
|
|
with open(fname, 'w', encoding='utf-8', errors='surrogateescape') as f:
|
2016-06-13 02:37:54 -04:00
|
|
|
f.write(init)
|
|
|
|
|
|
|
|
|
2016-06-12 12:31:50 -04:00
|
|
|
def main(args=None):
|
|
|
|
if args is None:
|
|
|
|
args = sys.argv
|
2016-06-16 11:03:43 -04:00
|
|
|
debug = 'DEBUG'
|
2016-06-13 01:28:27 -04:00
|
|
|
for pkg in args[1:]:
|
2016-06-16 11:03:43 -04:00
|
|
|
if pkg.startswith('--debug='):
|
|
|
|
debug = pkg[8:]
|
|
|
|
continue
|
2016-06-13 03:02:24 -04:00
|
|
|
print('Amalgamating ' + pkg)
|
2016-06-13 02:37:54 -04:00
|
|
|
exclude = read_exclude(pkg)
|
|
|
|
print(' excluding {}'.format(pprint.pformat(exclude)))
|
|
|
|
graph = make_graph(pkg, exclude=exclude)
|
2016-06-13 00:46:35 -04:00
|
|
|
order = depsort(graph)
|
2016-06-13 01:28:27 -04:00
|
|
|
src = amalgamate(order, graph, pkg)
|
2016-06-13 00:46:35 -04:00
|
|
|
write_amalgam(src, pkg)
|
2016-06-16 11:03:43 -04:00
|
|
|
rewrite_init(pkg, order, debug=debug)
|
2016-06-13 02:40:40 -04:00
|
|
|
print(' collapsed {} modules'.format(len(order)))
|
2016-06-12 12:31:50 -04:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2016-06-15 00:44:01 -04:00
|
|
|
main()
|