长话短说,我的Gist。
给定一个没有requirements.txt的Python项目,如果想知道需要安装哪些包才能满足这个项目的依赖需求,一个容易想到的方法就是对每一个.py文件,用模式匹配(如正则表达式)找import xxx,并记录xxx为需要的包。然而import语句有很多形式,如:import xxx、import xxx as aaa、import xxx as aaa, yyy as bbb、from xxx.yyy import fff as ccc、from .zzz import ggg。因此,更好的方法是利用抽象语法树ast模块来找出所有import语句。
Python的import语句对应ast的两种节点:ast.import和ast.importFrom。要从ast.import获取导入包的列表,可用:
[a.name for a in node.names] # 其中node是ast.import类型的
要从ast.importFrom获取导入的包,可用:
node.module # 其中node是ast.importFrom类型的
值得注意的是如果当前import语句是from 、import xxx,node.module将会是None,此时node.level > 0,意味着相对导入。因此,要想获得所有导入的包(除了相对导入外,因为相对导入的包绝不会是需要安装的依赖),可以这样:
import ast# 假设source包含待解析源码root = ast.parse(source)result = []for node in ast.walk(root): if isinstance(node, ast.import): for a in node.names: result.append(a.name.split('.', maxsplit=1)[0]) elif isinstance(node, ast.importFrom): if node.level == 0: result.append(node.module.split('.', maxsplit=1)[0])
然而绝对导入的包也有可能是工作目录中已存在的模块或包啊,此时我们就可以根据导入路径判断它是不是指工作目录下的包:
def exists_local(path, rootpkg): filepath = os.path.join(rootpkg, path.replace('.', os.path.sep)) # see if path is a local package if os.path.isdir(filepath) and os.path.isfile( os.path.join(filepath, '__init__.py')): return True # see if path is a local module if os.path.isfile(filepath + '.py'): return True return False
其中path是导入路径,rootpkg是根包所在目录(定义见这里)。
把这个核心功能稍作包装,便可写出下面的完整可执行代码:
from __future__ import print_functionimport argparseimport osimport astimport sysimport pkgutilimport itertoolsimport loggingdef make_parser(): parser = argparse.ArgumentParser( description=('List all root imports、The *root* import of ' '`import pkg1.mod1` is "pkg1".')) parser.add_argument( '-g', '--greedy', action='store_true', help=('find also import statements within try block, ' 'if block, while block, function definition, ' 'etc.')) parser.add_argument( '-i', '--include-installed', action='store_true', help='include installed/built-in modules/packages') parser.add_argument( '-T', '--files-from', metavar='LIST_FILE', help=('if specified, the files to process ' 'will be read one per line from ' 'LIST_FILE; if specified as `-', ' 'stdin will be expected to contain ' 'the files to process、Note that ' 'SOURCE_FILEs, if exist, take ' 'precedence (see below)')) parser.add_argument( 'rootpkg', metavar='ROOTPKG_DIR', type=dir_type, help= ('the directory of the root package、See ' 'https://docs.python.org/3.7/distutils/setupscript.html#listing-whole-packages ' 'about *root package*、Local packages/modules will be ' 'excluded from the results、For example, if ' 'there are "mod1.py" and "mod2.py", and in ' '"mod2.py" there is `import mod1`, then "mod1" ' 'won't be listed in the result.')) parser.add_argument( 'filenames', metavar='SOURCE_FILE', nargs='*', help=('if specified one or more files, ' 'only these SOURCE_FILEs will get ' 'processed regardless of `-T' ' 'option; if no SOURCE_FILE is ' 'specified, `-T', if exists, is ' 'processed、In both cases, the ' 'final results will be joined')) return parserdef dir_type(string): if not os.path.isdir(string): raise argparse.ArgumentTypeError('must be a directory') return string# Reference: https://stackoverflow.com/a/9049549/7881370def yield_imports(root, greedy): """ Yield all absolute imports. """ traverse = ast.walk if greedy else ast.iter_child_nodes for node in traverse(root): if isinstance(node, ast.import): for a in node.names: yield a.name elif isinstance(node, ast.importFrom): # if node.level > 0, the import is relative if node.level == 0: yield node.moduledef exists_local(path, rootpkg): """ Returns ``True`` if the absolute import ``path`` refers to a package or a module residing under the working directory, else ``False``. """ filepath = os.path.join(rootpkg, path.replace('.', os.path.sep)) # see if path is a local package if os.path.isdir(filepath) and os.path.isfile( os.path.join(filepath, '__init__.py')): return True # see if path is a local module if os.path.isfile(filepath + '.py'): return True return Falsedef filter_local(imports_iterable, rootpkg): """ Remove modules and packages in the working directory, and yield root imports. """ for path in imports_iterable: if not exists_local(path, rootpkg): yield path.split('.', 1)[0]def filter_installed(imports_iterable): """ Remove modules and packages already installed, which include built-in modules and packages and those already installed (e.g、via ``pip``). """ installed = set( itertools.chain(sys.builtin_module_names, (x[1] for x in pkgutil.iter_modules()))) for name in imports_iterable: if name not in installed: yield namedef collect_sources(filenames, files_from): if filenames: for filename in filenames: yield filename elif files_from == '-': try: for line in sys.stdin: yield line.rstrip('n') except KeyboardInterrupt: pass elif files_from: try: with open(files_from) as infile: for line in infile: yield line.rstrip('n') except OSError: logging.exception('failed to read from "{}"'.format(files_from))def main(): logging.basicConfig(format='%(levelname)s: %(message)s') args = make_parser().parse_args() all_imports = [] for filename in collect_sources(args.filenames, args.files_from): try: with open(filename) as infile: root = ast.parse(infile.read(), filename) except OSError: logging.exception('skipped') except SyntaxError: logging.exception('failed to parse "{}"; skipped'.format(filename)) else: all_imports.append(yield_imports(root, args.greedy)) all_imports = itertools.chain.from_iterable(all_imports) all_imports = filter_local(all_imports, args.rootpkg) if not args.include_installed: all_imports = filter_installed(all_imports) all_imports = sorted(set(all_imports)) if all_imports: print('n'.join(all_imports)) logging.shutdown()if __name__ == '__main__': main()
需要注意的是,程序的输出并不一定是PyPI上包的名字(例如,import bs4然而pip install beautifulsoup4)。
类似项目:pipreqs。核心代码是几乎一样的,但包装得不同。