aboutsummaryrefslogtreecommitdiff
path: root/Tools
diff options
context:
space:
mode:
authorEric Snow <ericsnowcurrently@gmail.com>2020-12-24 11:04:19 -0700
committerGitHub <noreply@github.com>2020-12-24 11:04:19 -0700
commit7ec59d8861ef1104c3028678b2cacde4c5693e19 (patch)
treeb499f0504f79e1a218229e93ca1847fb61d80831 /Tools
parentcloses bpo-42726: gdb libpython: InstanceProxy support for py3 (GH-23912) (diff)
downloadcpython-7ec59d8861ef1104c3028678b2cacde4c5693e19.tar.gz
cpython-7ec59d8861ef1104c3028678b2cacde4c5693e19.tar.bz2
cpython-7ec59d8861ef1104c3028678b2cacde4c5693e19.zip
bpo-36876: [c-analyzer tool] Add a "capi" subcommand to the c-analyzer tool. (gh-23918)
This will help identify which C-API items will need to be updated for subinterpreter support. https://bugs.python.org/issue36876
Diffstat (limited to 'Tools')
-rw-r--r--Tools/c-analyzer/c_analyzer/__main__.py9
-rw-r--r--Tools/c-analyzer/c_common/scriptutil.py24
-rw-r--r--Tools/c-analyzer/c_common/tables.py176
-rw-r--r--Tools/c-analyzer/c_parser/__main__.py3
-rw-r--r--Tools/c-analyzer/c_parser/preprocessor/__main__.py5
-rw-r--r--Tools/c-analyzer/check-c-globals.py1
-rw-r--r--Tools/c-analyzer/cpython/__main__.py104
-rw-r--r--Tools/c-analyzer/cpython/_capi.py479
-rw-r--r--Tools/c-analyzer/cpython/_files.py69
-rw-r--r--Tools/c-analyzer/cpython/_parser.py36
10 files changed, 849 insertions, 57 deletions
diff --git a/Tools/c-analyzer/c_analyzer/__main__.py b/Tools/c-analyzer/c_analyzer/__main__.py
index 44325f2952e..24fc6cd1826 100644
--- a/Tools/c-analyzer/c_analyzer/__main__.py
+++ b/Tools/c-analyzer/c_analyzer/__main__.py
@@ -263,7 +263,7 @@ FORMATS = {
def add_output_cli(parser, *, default='summary'):
parser.add_argument('--format', dest='fmt', default=default, choices=tuple(FORMATS))
- def process_args(args):
+ def process_args(args, *, argv=None):
pass
return process_args
@@ -280,7 +280,7 @@ def _cli_check(parser, checks=None, **kwargs):
process_checks = add_checks_cli(parser)
elif len(checks) == 1 and type(checks) is not dict and re.match(r'^<.*>$', checks[0]):
check = checks[0][1:-1]
- def process_checks(args):
+ def process_checks(args, *, argv=None):
args.checks = [check]
else:
process_checks = add_checks_cli(parser, checks=checks)
@@ -428,9 +428,9 @@ def _cli_data(parser, filenames=None, known=None):
if known is None:
sub.add_argument('--known', required=True)
- def process_args(args):
+ def process_args(args, *, argv):
if args.datacmd == 'dump':
- process_progress(args)
+ process_progress(args, argv)
return process_args
@@ -515,6 +515,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset=None):
verbosity, traceback_cm = process_args_by_key(
args,
+ argv,
processors[cmd],
['verbosity', 'traceback_cm'],
)
diff --git a/Tools/c-analyzer/c_common/scriptutil.py b/Tools/c-analyzer/c_common/scriptutil.py
index 50dd7548869..ce69af2b6bd 100644
--- a/Tools/c-analyzer/c_common/scriptutil.py
+++ b/Tools/c-analyzer/c_common/scriptutil.py
@@ -192,7 +192,7 @@ def add_verbosity_cli(parser):
parser.add_argument('-q', '--quiet', action='count', default=0)
parser.add_argument('-v', '--verbose', action='count', default=0)
- def process_args(args):
+ def process_args(args, *, argv=None):
ns = vars(args)
key = 'verbosity'
if key in ns:
@@ -208,7 +208,7 @@ def add_traceback_cli(parser):
parser.add_argument('--no-traceback', '--no-tb', dest='traceback',
action='store_const', const=False)
- def process_args(args):
+ def process_args(args, *, argv=None):
ns = vars(args)
key = 'traceback_cm'
if key in ns:
@@ -262,7 +262,7 @@ def add_sepval_cli(parser, opt, dest, choices, *, sep=',', **kwargs):
#kwargs.setdefault('metavar', opt.upper())
parser.add_argument(opt, dest=dest, action='append', **kwargs)
- def process_args(args):
+ def process_args(args, *, argv=None):
ns = vars(args)
# XXX Use normalize_selection()?
@@ -293,7 +293,7 @@ def add_file_filtering_cli(parser, *, excluded=None):
excluded = tuple(excluded or ())
- def process_args(args):
+ def process_args(args, *, argv=None):
ns = vars(args)
key = 'iter_filenames'
if key in ns:
@@ -323,7 +323,7 @@ def add_progress_cli(parser, *, threshold=VERBOSITY, **kwargs):
parser.add_argument('--no-progress', dest='track_progress', action='store_false')
parser.set_defaults(track_progress=True)
- def process_args(args):
+ def process_args(args, *, argv=None):
if args.track_progress:
ns = vars(args)
verbosity = ns.get('verbosity', VERBOSITY)
@@ -339,7 +339,7 @@ def add_failure_filtering_cli(parser, pool, *, default=False):
metavar=f'"{{all|{"|".join(sorted(pool))}}},..."')
parser.add_argument('--no-fail', dest='fail', action='store_const', const=())
- def process_args(args):
+ def process_args(args, *, argv=None):
ns = vars(args)
fail = ns.pop('fail')
@@ -371,7 +371,7 @@ def add_failure_filtering_cli(parser, pool, *, default=False):
def add_kind_filtering_cli(parser, *, default=None):
parser.add_argument('--kinds', action='append')
- def process_args(args):
+ def process_args(args, *, argv=None):
ns = vars(args)
kinds = []
@@ -486,18 +486,18 @@ def _flatten_processors(processors):
yield from _flatten_processors(proc)
-def process_args(args, processors, *, keys=None):
+def process_args(args, argv, processors, *, keys=None):
processors = _flatten_processors(processors)
ns = vars(args)
extracted = {}
if keys is None:
for process_args in processors:
- for key in process_args(args):
+ for key in process_args(args, argv=argv):
extracted[key] = ns.pop(key)
else:
remainder = set(keys)
for process_args in processors:
- hanging = process_args(args)
+ hanging = process_args(args, argv=argv)
if isinstance(hanging, str):
hanging = [hanging]
for key in hanging or ():
@@ -510,8 +510,8 @@ def process_args(args, processors, *, keys=None):
return extracted
-def process_args_by_key(args, processors, keys):
- extracted = process_args(args, processors, keys=keys)
+def process_args_by_key(args, argv, processors, keys):
+ extracted = process_args(args, argv, processors, keys=keys)
return [extracted[key] for key in keys]
diff --git a/Tools/c-analyzer/c_common/tables.py b/Tools/c-analyzer/c_common/tables.py
index 411152e3f94..85b50192571 100644
--- a/Tools/c-analyzer/c_common/tables.py
+++ b/Tools/c-analyzer/c_common/tables.py
@@ -1,4 +1,6 @@
import csv
+import re
+import textwrap
from . import NOT_SET, strutil, fsutil
@@ -212,3 +214,177 @@ def _normalize_table_file_props(header, sep):
else:
sep = None
return header, sep
+
+
+##################################
+# stdout tables
+
+WIDTH = 20
+
+
+def resolve_columns(specs):
+ if isinstance(specs, str):
+ specs = specs.replace(',', ' ').strip().split()
+ return _resolve_colspecs(specs)
+
+
+def build_table(specs, *, sep=' ', defaultwidth=None):
+ columns = resolve_columns(specs)
+ return _build_table(columns, sep=sep, defaultwidth=defaultwidth)
+
+
+_COLSPEC_RE = re.compile(textwrap.dedent(r'''
+ ^
+ (?:
+ [[]
+ (
+ (?: [^\s\]] [^\]]* )?
+ [^\s\]]
+ ) # <label>
+ []]
+ )?
+ ( \w+ ) # <field>
+ (?:
+ (?:
+ :
+ ( [<^>] ) # <align>
+ ( \d+ ) # <width1>
+ )
+ |
+ (?:
+ (?:
+ :
+ ( \d+ ) # <width2>
+ )?
+ (?:
+ :
+ ( .*? ) # <fmt>
+ )?
+ )
+ )?
+ $
+'''), re.VERBOSE)
+
+
+def _parse_fmt(fmt):
+ if fmt.startswith(tuple('<^>')):
+ align = fmt[0]
+ width = fmt[1:]
+ if width.isdigit():
+ return int(width), align
+ return None, None
+
+
+def _parse_colspec(raw):
+ m = _COLSPEC_RE.match(raw)
+ if not m:
+ return None
+ label, field, align, width1, width2, fmt = m.groups()
+ if not label:
+ label = field
+ if width1:
+ width = None
+ fmt = f'{align}{width1}'
+ elif width2:
+ width = int(width2)
+ if fmt:
+ _width, _ = _parse_fmt(fmt)
+ if _width == width:
+ width = None
+ else:
+ width = None
+ return field, label, width, fmt
+
+
+def _normalize_colspec(spec):
+ if len(spec) == 1:
+ raw, = spec
+ return _resolve_column(raw)
+
+ if len(spec) == 4:
+ label, field, width, fmt = spec
+ if width:
+ fmt = f'{width}:{fmt}' if fmt else width
+ elif len(raw) == 3:
+ label, field, fmt = spec
+ if not field:
+ label, field = None, label
+ elif not isinstance(field, str) or not field.isidentifier():
+ fmt = f'{field}:{fmt}' if fmt else field
+ label, field = None, label
+ elif len(raw) == 2:
+ label = None
+ field, fmt = raw
+ if not field:
+ field, fmt = fmt, None
+ elif not field.isidentifier() or fmt.isidentifier():
+ label, field = field, fmt
+ else:
+ raise NotImplementedError
+
+ fmt = f':{fmt}' if fmt else ''
+ if label:
+ return _parse_colspec(f'[{label}]{field}{fmt}')
+ else:
+ return _parse_colspec(f'{field}{fmt}')
+
+
+def _resolve_colspec(raw):
+ if isinstance(raw, str):
+ spec = _parse_colspec(raw)
+ else:
+ spec = _normalize_colspec(raw)
+ if spec is None:
+ raise ValueError(f'unsupported column spec {raw!r}')
+ return spec
+
+
+def _resolve_colspecs(columns):
+ parsed = []
+ for raw in columns:
+ column = _resolve_colspec(raw)
+ parsed.append(column)
+ return parsed
+
+
+def _resolve_width(spec, defaultwidth):
+ _, label, width, fmt = spec
+ if width:
+ if not isinstance(width, int):
+ raise NotImplementedError
+ return width
+ elif width and fmt:
+ width, _ = _parse_fmt(fmt)
+ if width:
+ return width
+
+ if not defaultwidth:
+ return WIDTH
+ elif not hasattr(defaultwidth, 'get'):
+ return defaultwidth or WIDTH
+
+ defaultwidths = defaultwidth
+ defaultwidth = defaultwidths.get(None) or WIDTH
+ return defaultwidths.get(label) or defaultwidth
+
+
+def _build_table(columns, *, sep=' ', defaultwidth=None):
+ header = []
+ div = []
+ rowfmt = []
+ for spec in columns:
+ label, field, _, colfmt = spec
+ width = _resolve_width(spec, defaultwidth)
+ if colfmt:
+ colfmt = f':{colfmt}'
+ else:
+ colfmt = f':{width}'
+
+ header.append(f' {{:^{width}}} '.format(label))
+ div.append('-' * (width + 2))
+ rowfmt.append(f' {{{field}{colfmt}}} ')
+ return (
+ sep.join(header),
+ sep.join(div),
+ sep.join(rowfmt),
+ )
diff --git a/Tools/c-analyzer/c_parser/__main__.py b/Tools/c-analyzer/c_parser/__main__.py
index 539cec509ce..78f47a1808f 100644
--- a/Tools/c-analyzer/c_parser/__main__.py
+++ b/Tools/c-analyzer/c_parser/__main__.py
@@ -149,7 +149,7 @@ def add_output_cli(parser):
parser.add_argument('--showfwd', action='store_true', default=None)
parser.add_argument('--no-showfwd', dest='showfwd', action='store_false', default=None)
- def process_args(args):
+ def process_args(args, *, argv=None):
pass
return process_args
@@ -243,6 +243,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset='parse'):
verbosity, traceback_cm = process_args_by_key(
args,
+ argv,
processors[cmd],
['verbosity', 'traceback_cm'],
)
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__main__.py b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
index a6054307c25..bfc61949a76 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/__main__.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
@@ -40,10 +40,10 @@ def add_common_cli(parser, *, get_preprocessor=_get_preprocessor):
parser.add_argument('--same', action='append')
process_fail_arg = add_failure_filtering_cli(parser, FAIL)
- def process_args(args):
+ def process_args(args, *, argv):
ns = vars(args)
- process_fail_arg(args)
+ process_fail_arg(args, argv)
ignore_exc = ns.pop('ignore_exc')
# We later pass ignore_exc to _get_preprocessor().
@@ -174,6 +174,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *,
verbosity, traceback_cm = process_args_by_key(
args,
+ argv,
processors[cmd],
['verbosity', 'traceback_cm'],
)
diff --git a/Tools/c-analyzer/check-c-globals.py b/Tools/c-analyzer/check-c-globals.py
index 3fe2bdcae14..b1364a612bb 100644
--- a/Tools/c-analyzer/check-c-globals.py
+++ b/Tools/c-analyzer/check-c-globals.py
@@ -22,6 +22,7 @@ def parse_args():
cmd = 'check'
verbosity, traceback_cm = process_args_by_key(
args,
+ argv,
processors,
['verbosity', 'traceback_cm'],
)
diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py
index 6d78af299bb..9d29b13ed8f 100644
--- a/Tools/c-analyzer/cpython/__main__.py
+++ b/Tools/c-analyzer/cpython/__main__.py
@@ -3,11 +3,14 @@ import sys
from c_common.fsutil import expand_filenames, iter_files_by_suffix
from c_common.scriptutil import (
+ VERBOSITY,
add_verbosity_cli,
add_traceback_cli,
add_commands_cli,
add_kind_filtering_cli,
add_files_cli,
+ add_progress_cli,
+ main_for_filenames,
process_args_by_key,
configure_logger,
get_prog,
@@ -17,7 +20,7 @@ import c_parser.__main__ as c_parser
import c_analyzer.__main__ as c_analyzer
import c_analyzer as _c_analyzer
from c_analyzer.info import UNKNOWN
-from . import _analyzer, _parser, REPO_ROOT
+from . import _analyzer, _capi, _files, _parser, REPO_ROOT
logger = logging.getLogger(__name__)
@@ -25,9 +28,9 @@ logger = logging.getLogger(__name__)
def _resolve_filenames(filenames):
if filenames:
- resolved = (_parser.resolve_filename(f) for f in filenames)
+ resolved = (_files.resolve_filename(f) for f in filenames)
else:
- resolved = _parser.iter_filenames()
+ resolved = _files.iter_filenames()
return resolved
@@ -204,6 +207,95 @@ def cmd_data(datacmd, **kwargs):
)
+def _cli_capi(parser):
+ parser.add_argument('--levels', action='append', metavar='LEVEL[,...]')
+ parser.add_argument(f'--public', dest='levels',
+ action='append_const', const='public')
+ parser.add_argument(f'--no-public', dest='levels',
+ action='append_const', const='no-public')
+ for level in _capi.LEVELS:
+ parser.add_argument(f'--{level}', dest='levels',
+ action='append_const', const=level)
+ def process_levels(args, *, argv=None):
+ levels = []
+ for raw in args.levels or ():
+ for level in raw.replace(',', ' ').strip().split():
+ if level == 'public':
+ levels.append('stable')
+ levels.append('cpython')
+ elif level == 'no-public':
+ levels.append('private')
+ levels.append('internal')
+ elif level in _capi.LEVELS:
+ levels.append(level)
+ else:
+ parser.error(f'expected LEVEL to be one of {sorted(_capi.LEVELS)}, got {level!r}')
+ args.levels = set(levels)
+
+ parser.add_argument('--kinds', action='append', metavar='KIND[,...]')
+ for kind in _capi.KINDS:
+ parser.add_argument(f'--{kind}', dest='kinds',
+ action='append_const', const=kind)
+ def process_kinds(args, *, argv=None):
+ kinds = []
+ for raw in args.kinds or ():
+ for kind in raw.replace(',', ' ').strip().split():
+ if kind in _capi.KINDS:
+ kind.append(kind)
+ else:
+ parser.error(f'expected KIND to be one of {sorted(_capi.KINDS)}, got {kind!r}')
+ args.kinds = set(kinds)
+
+ parser.add_argument('--group-by', dest='groupby',
+ choices=['level', 'kind'])
+
+ parser.add_argument('--format', default='brief')
+ parser.add_argument('--summary', dest='format',
+ action='store_const', const='summary')
+ def process_format(args, *, argv=None):
+ orig = args.format
+ args.format = _capi.resolve_format(args.format)
+ if isinstance(args.format, str):
+ if args.format not in _capi._FORMATS:
+ parser.error(f'unsupported format {orig!r}')
+
+ parser.add_argument('filenames', nargs='*', metavar='FILENAME')
+ process_progress = add_progress_cli(parser)
+
+ return [
+ process_levels,
+ process_format,
+ process_progress,
+ ]
+
+
+def cmd_capi(filenames=None, *,
+ levels=None,
+ kinds=None,
+ groupby='kind',
+ format='brief',
+ track_progress=None,
+ verbosity=VERBOSITY,
+ **kwargs
+ ):
+ render = _capi.get_renderer(format)
+
+ filenames = _files.iter_header_files(filenames, levels=levels)
+ #filenames = (file for file, _ in main_for_filenames(filenames))
+ if track_progress is not None:
+ filenames = track_progress(filenames)
+ items = _capi.iter_capi(filenames)
+ if levels:
+ items = (item for item in items if item.level in levels)
+ if kinds:
+ items = (item for item in items if item.kind in kinds)
+
+ lines = render(items, groupby=groupby, verbose=verbosity > VERBOSITY)
+ print()
+ for line in lines:
+ print(line)
+
+
# We do not define any other cmd_*() handlers here,
# favoring those defined elsewhere.
@@ -228,6 +320,11 @@ COMMANDS = {
[_cli_data],
cmd_data,
),
+ 'capi': (
+ 'inspect the C-API',
+ [_cli_capi],
+ cmd_capi,
+ ),
}
@@ -263,6 +360,7 @@ def parse_args(argv=sys.argv[1:], prog=None, *, subset=None):
verbosity, traceback_cm = process_args_by_key(
args,
+ argv,
processors[cmd],
['verbosity', 'traceback_cm'],
)
diff --git a/Tools/c-analyzer/cpython/_capi.py b/Tools/c-analyzer/cpython/_capi.py
new file mode 100644
index 00000000000..38d7cd3c514
--- /dev/null
+++ b/Tools/c-analyzer/cpython/_capi.py
@@ -0,0 +1,479 @@
+from collections import namedtuple
+import os
+import os.path
+import re
+import textwrap
+
+from c_common.tables import build_table, resolve_columns
+from c_parser.parser._regexes import _ind
+from ._files import iter_header_files, resolve_filename
+from . import REPO_ROOT
+
+
+INCLUDE_ROOT = os.path.join(REPO_ROOT, 'Include')
+INCLUDE_CPYTHON = os.path.join(INCLUDE_ROOT, 'cpython')
+INCLUDE_INTERNAL = os.path.join(INCLUDE_ROOT, 'internal')
+
+_MAYBE_NESTED_PARENS = textwrap.dedent(r'''
+ (?:
+ (?: [^(]* [(] [^()]* [)] )* [^(]*
+ )
+''')
+
+CAPI_FUNC = textwrap.dedent(rf'''
+ (?:
+ ^
+ \s*
+ PyAPI_FUNC \s*
+ [(]
+ {_ind(_MAYBE_NESTED_PARENS, 2)}
+ [)] \s*
+ (\w+) # <func>
+ \s* [(]
+ )
+''')
+CAPI_DATA = textwrap.dedent(rf'''
+ (?:
+ ^
+ \s*
+ PyAPI_DATA \s*
+ [(]
+ {_ind(_MAYBE_NESTED_PARENS, 2)}
+ [)] \s*
+ (\w+) # <data>
+ \b [^(]
+ )
+''')
+CAPI_INLINE = textwrap.dedent(r'''
+ (?:
+ ^
+ \s*
+ static \s+ inline \s+
+ .*?
+ \s+
+ ( \w+ ) # <inline>
+ \s* [(]
+ )
+''')
+CAPI_MACRO = textwrap.dedent(r'''
+ (?:
+ (\w+) # <macro>
+ [(]
+ )
+''')
+CAPI_CONSTANT = textwrap.dedent(r'''
+ (?:
+ (\w+) # <constant>
+ \s+ [^(]
+ )
+''')
+CAPI_DEFINE = textwrap.dedent(rf'''
+ (?:
+ ^
+ \s* [#] \s* define \s+
+ (?:
+ {_ind(CAPI_MACRO, 3)}
+ |
+ {_ind(CAPI_CONSTANT, 3)}
+ |
+ (?:
+ # ignored
+ \w+ # <defined_name>
+ \s*
+ $
+ )
+ )
+ )
+''')
+CAPI_RE = re.compile(textwrap.dedent(rf'''
+ (?:
+ {_ind(CAPI_FUNC, 2)}
+ |
+ {_ind(CAPI_DATA, 2)}
+ |
+ {_ind(CAPI_INLINE, 2)}
+ |
+ {_ind(CAPI_DEFINE, 2)}
+ )
+'''), re.VERBOSE)
+
+KINDS = [
+ 'func',
+ 'data',
+ 'inline',
+ 'macro',
+ 'constant',
+]
+
+
+def _parse_line(line, prev=None):
+ last = line
+ if prev:
+ if not prev.endswith(os.linesep):
+ prev += os.linesep
+ line = prev + line
+ m = CAPI_RE.match(line)
+ if not m:
+ if not prev and line.startswith('static inline '):
+ return line # the new "prev"
+ #if 'PyAPI_' in line or '#define ' in line or ' define ' in line:
+ # print(line)
+ return None
+ results = zip(KINDS, m.groups())
+ for kind, name in results:
+ if name:
+ clean = last.split('//')[0].strip()
+ if clean.endswith('*/'):
+ clean = clean.split('/*')[0].rstrip()
+ if kind == 'macro' or kind == 'constant':
+ if clean.endswith('\\'):
+ return line # the new "prev"
+ elif kind == 'inline':
+ if not prev:
+ if not clean.endswith('}'):
+ return line # the new "prev"
+ elif clean != '}':
+ return line # the new "prev"
+ elif not clean.endswith(';'):
+ return line # the new "prev"
+ return name, kind
+ # It was a plain #define.
+ return None
+
+
+LEVELS = {
+ 'stable',
+ 'cpython',
+ 'private',
+ 'internal',
+}
+
+def _get_level(filename, name, *,
+ _cpython=INCLUDE_CPYTHON + os.path.sep,
+ _internal=INCLUDE_INTERNAL + os.path.sep,
+ ):
+ if filename.startswith(_internal):
+ return 'internal'
+ elif name.startswith('_'):
+ return 'private'
+ elif os.path.dirname(filename) == INCLUDE_ROOT:
+ return 'stable'
+ elif filename.startswith(_cpython):
+ return 'cpython'
+ else:
+ raise NotImplementedError
+ #return '???'
+
+
+class CAPIItem(namedtuple('CAPIItem', 'file lno name kind level')):
+
+ @classmethod
+ def from_line(cls, line, filename, lno, prev=None):
+ parsed = _parse_line(line, prev)
+ if not parsed:
+ return None, None
+ if isinstance(parsed, str):
+ # incomplete
+ return None, parsed
+ name, kind = parsed
+ level = _get_level(filename, name)
+ self = cls(filename, lno, name, kind, level)
+ if prev:
+ self._text = (prev + line).rstrip().splitlines()
+ else:
+ self._text = [line.rstrip()]
+ return self, None
+
+ @property
+ def relfile(self):
+ return self.file[len(REPO_ROOT) + 1:]
+
+ @property
+ def text(self):
+ try:
+ return self._text
+ except AttributeError:
+ # XXX Actually ready the text from disk?.
+ self._text = []
+ if self.kind == 'data':
+ self._text = [
+ f'PyAPI_DATA(...) {self.name}',
+ ]
+ elif self.kind == 'func':
+ self._text = [
+ f'PyAPI_FUNC(...) {self.name}(...);',
+ ]
+ elif self.kind == 'inline':
+ self._text = [
+ f'static inline {self.name}(...);',
+ ]
+ elif self.kind == 'macro':
+ self._text = [
+ f'#define {self.name}(...) \\',
+ f' ...',
+ ]
+ elif self.kind == 'constant':
+ self._text = [
+ f'#define {self.name} ...',
+ ]
+ else:
+ raise NotImplementedError
+
+ return self._text
+
+
+def _parse_groupby(raw):
+ if not raw:
+ raw = 'kind'
+
+ if isinstance(raw, str):
+ groupby = raw.replace(',', ' ').strip().split()
+ else:
+ raise NotImplementedError
+
+ if not all(v in ('kind', 'level') for v in groupby):
+ raise ValueError(f'invalid groupby value {raw!r}')
+ return groupby
+
+
+def summarize(items, *, groupby='kind'):
+ summary = {}
+
+ groupby = _parse_groupby(groupby)[0]
+ if groupby == 'kind':
+ outers = KINDS
+ inners = LEVELS
+ def increment(item):
+ summary[item.kind][item.level] += 1
+ elif groupby == 'level':
+ outers = LEVELS
+ inners = KINDS
+ def increment(item):
+ summary[item.level][item.kind] += 1
+ else:
+ raise NotImplementedError
+
+ for outer in outers:
+ summary[outer] = _outer = {}
+ for inner in inners:
+ _outer[inner] = 0
+ for item in items:
+ increment(item)
+
+ return summary
+
+
+def _parse_capi(lines, filename):
+ if isinstance(lines, str):
+ lines = lines.splitlines()
+ prev = None
+ for lno, line in enumerate(lines, 1):
+ parsed, prev = CAPIItem.from_line(line, filename, lno, prev)
+ if parsed:
+ yield parsed
+ if prev:
+ parsed, prev = CAPIItem.from_line('', filename, lno, prev)
+ if parsed:
+ yield parsed
+ if prev:
+ print('incomplete match:')
+ print(filename)
+ print(prev)
+ raise Exception
+
+
+def iter_capi(filenames=None):
+ for filename in iter_header_files(filenames):
+ with open(filename) as infile:
+ for item in _parse_capi(infile, filename):
+ yield item
+
+
+def _collate(items, groupby):
+ groupby = _parse_groupby(groupby)[0]
+ maxfilename = maxname = maxkind = maxlevel = 0
+ collated = {}
+ for item in items:
+ key = getattr(item, groupby)
+ if key in collated:
+ collated[key].append(item)
+ else:
+ collated[key] = [item]
+ maxfilename = max(len(item.relfile), maxfilename)
+ maxname = max(len(item.name), maxname)
+ maxkind = max(len(item.kind), maxkind)
+ maxlevel = max(len(item.level), maxlevel)
+ maxextra = {
+ 'kind': maxkind,
+ 'level': maxlevel,
+ }
+ return collated, groupby, maxfilename, maxname, maxextra
+
+
+##################################
+# CLI rendering
+
+_LEVEL_MARKERS = {
+ 'S': 'stable',
+ 'C': 'cpython',
+ 'P': 'private',
+ 'I': 'internal',
+}
+_KIND_MARKERS = {
+ 'F': 'func',
+ 'D': 'data',
+ 'I': 'inline',
+ 'M': 'macro',
+ 'C': 'constant',
+}
+
+
+def resolve_format(format):
+ if not format:
+ return 'brief'
+ elif isinstance(format, str) and format in _FORMATS:
+ return format
+ else:
+ return resolve_columns(format)
+
+
+def get_renderer(format):
+ format = resolve_format(format)
+ if isinstance(format, str):
+ try:
+ return _FORMATS[format]
+ except KeyError:
+ raise ValueError(f'unsupported format {format!r}')
+ else:
+ def render(items, **kwargs):
+ return render_table(items, columns=format, **kwargs)
+ return render
+
+
+def render_table(items, *, columns=None, groupby='kind', verbose=False):
+ if groupby:
+ collated, groupby, maxfilename, maxname, maxextra = _collate(items, groupby)
+ if groupby == 'kind':
+ groups = KINDS
+ extras = ['level']
+ markers = {'level': _LEVEL_MARKERS}
+ elif groupby == 'level':
+ groups = LEVELS
+ extras = ['kind']
+ markers = {'kind': _KIND_MARKERS}
+ else:
+ raise NotImplementedError
+ else:
+ # XXX Support no grouping?
+ raise NotImplementedError
+
+ if columns:
+ def get_extra(item):
+ return {extra: getattr(item, extra)
+ for extra in ('kind', 'level')}
+ else:
+ if verbose:
+ maxextra['kind'] = max(len(kind) for kind in KINDS)
+ maxextra['level'] = max(len(level) for level in LEVELS)
+ extracols = [f'{extra}:{maxextra[extra]}'
+ for extra in extras]
+ def get_extra(item):
+ return {extra: getattr(item, extra)
+ for extra in extras}
+ elif len(extras) == 1:
+ extra, = extras
+ extracols = [f'{m}:1' for m in markers[extra]]
+ def get_extra(item):
+ return {m: m if getattr(item, extra) == markers[extra][m] else ''
+ for m in markers[extra]}
+ else:
+ raise NotImplementedError
+ #extracols = [[f'{m}:1' for m in markers[extra]]
+ # for extra in extras]
+ #def get_extra(item):
+ # values = {}
+ # for extra in extras:
+ # cur = markers[extra]
+ # for m in cur:
+ # values[m] = m if getattr(item, m) == cur[m] else ''
+ # return values
+ columns = [
+ f'filename:{maxfilename}',
+ f'name:{maxname}',
+ *extracols,
+ ]
+ header, div, fmt = build_table(columns)
+
+ total = 0
+ for group in groups:
+ if group not in collated:
+ continue
+ yield ''
+ yield f' === {group} ==='
+ yield ''
+ yield header
+ yield div
+ for item in collated[group]:
+ yield fmt.format(
+ filename=item.relfile,
+ name=item.name,
+ **get_extra(item),
+ )
+ yield div
+ subtotal = len(collated[group])
+ yield f' sub-total: {subtotal}'
+ total += subtotal
+ yield ''
+ yield f'total: {total}'
+
+
+def render_full(items, *, groupby=None, verbose=False):
+ if groupby:
+ collated, groupby, _, _, _ = _collate(items, groupby)
+ for group, grouped in collated.items():
+ yield '#' * 25
+ yield f'# {group} ({len(grouped)})'
+ yield '#' * 25
+ yield ''
+ if not grouped:
+ continue
+ for item in grouped:
+ yield from _render_item_full(item, groupby, verbose)
+ yield ''
+ else:
+ for item in items:
+ yield from _render_item_full(item, None, verbose)
+ yield ''
+
+
+def _render_item_full(item, groupby, verbose):
+ yield item.name
+ yield f' {"filename:":10} {item.relfile}'
+ for extra in ('kind', 'level'):
+ #if groupby != extra:
+ yield f' {extra+":":10} {getattr(item, extra)}'
+ if verbose:
+ print(' ---------------------------------------')
+ for lno, line in enumerate(item.text, item.lno):
+ print(f' | {lno:3} {line}')
+ print(' ---------------------------------------')
+
+
+def render_summary(items, *, groupby='kind', verbose=False):
+ total = 0
+ summary = summarize(items, groupby=groupby)
+ # XXX Stablize the sorting to match KINDS/LEVELS.
+ for outer, counts in summary.items():
+ subtotal = sum(c for _, c in counts.items())
+ yield f'{outer + ":":20} ({subtotal})'
+ for inner, count in counts.items():
+ yield f' {inner + ":":9} {count}'
+ total += subtotal
+ yield f'{"total:":20} ({total})'
+
+
+_FORMATS = {
+ 'brief': render_table,
+ 'full': render_full,
+ 'summary': render_summary,
+}
diff --git a/Tools/c-analyzer/cpython/_files.py b/Tools/c-analyzer/cpython/_files.py
new file mode 100644
index 00000000000..3e397880977
--- /dev/null
+++ b/Tools/c-analyzer/cpython/_files.py
@@ -0,0 +1,69 @@
+import os.path
+
+from c_common.fsutil import expand_filenames, iter_files_by_suffix
+from . import REPO_ROOT, INCLUDE_DIRS, SOURCE_DIRS
+
+
+GLOBS = [
+ 'Include/*.h',
+ 'Include/internal/*.h',
+ 'Modules/**/*.h',
+ 'Modules/**/*.c',
+ 'Objects/**/*.h',
+ 'Objects/**/*.c',
+ 'Python/**/*.h',
+ 'Parser/**/*.c',
+ 'Python/**/*.h',
+ 'Parser/**/*.c',
+]
+LEVEL_GLOBS = {
+ 'stable': 'Include/*.h',
+ 'cpython': 'Include/cpython/*.h',
+ 'internal': 'Include/internal/*.h',
+}
+
+
+def resolve_filename(filename):
+ orig = filename
+ filename = os.path.normcase(os.path.normpath(filename))
+ if os.path.isabs(filename):
+ if os.path.relpath(filename, REPO_ROOT).startswith('.'):
+ raise Exception(f'{orig!r} is outside the repo ({REPO_ROOT})')
+ return filename
+ else:
+ return os.path.join(REPO_ROOT, filename)
+
+
+def iter_filenames(*, search=False):
+ if search:
+ yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
+ yield from iter_files_by_suffix(SOURCE_DIRS, ('.c',))
+ else:
+ globs = (os.path.join(REPO_ROOT, file) for file in GLOBS)
+ yield from expand_filenames(globs)
+
+
+def iter_header_files(filenames=None, *, levels=None):
+ if not filenames:
+ if levels:
+ levels = set(levels)
+ if 'private' in levels:
+ levels.add('stable')
+ levels.add('cpython')
+ for level, glob in LEVEL_GLOBS.items():
+ if level in levels:
+ yield from expand_filenames([glob])
+ else:
+ yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
+ return
+
+ for filename in filenames:
+ orig = filename
+ filename = resolve_filename(filename)
+ if filename.endswith(os.path.sep):
+ yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
+ elif filename.endswith('.h'):
+ yield filename
+ else:
+ # XXX Log it and continue instead?
+ raise ValueError(f'expected .h file, got {orig!r}')
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index eef75849538..ef06a9fcb69 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -1,7 +1,6 @@
import os.path
import re
-from c_common.fsutil import expand_filenames, iter_files_by_suffix
from c_parser.preprocessor import (
get_preprocessor as _get_preprocessor,
)
@@ -9,7 +8,7 @@ from c_parser import (
parse_file as _parse_file,
parse_files as _parse_files,
)
-from . import REPO_ROOT, INCLUDE_DIRS, SOURCE_DIRS
+from . import REPO_ROOT
GLOB_ALL = '**/*'
@@ -43,19 +42,6 @@ def clean_lines(text):
@end=sh@
'''
-GLOBS = [
- 'Include/*.h',
- 'Include/internal/*.h',
- 'Modules/**/*.h',
- 'Modules/**/*.c',
- 'Objects/**/*.h',
- 'Objects/**/*.c',
- 'Python/**/*.h',
- 'Parser/**/*.c',
- 'Python/**/*.h',
- 'Parser/**/*.c',
-]
-
EXCLUDED = clean_lines('''
# @begin=conf@
@@ -280,26 +266,6 @@ SAME = [
]
-def resolve_filename(filename):
- orig = filename
- filename = os.path.normcase(os.path.normpath(filename))
- if os.path.isabs(filename):
- if os.path.relpath(filename, REPO_ROOT).startswith('.'):
- raise Exception(f'{orig!r} is outside the repo ({REPO_ROOT})')
- return filename
- else:
- return os.path.join(REPO_ROOT, filename)
-
-
-def iter_filenames(*, search=False):
- if search:
- yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
- yield from iter_files_by_suffix(SOURCE_DIRS, ('.c',))
- else:
- globs = (os.path.join(REPO_ROOT, file) for file in GLOBS)
- yield from expand_filenames(globs)
-
-
def get_preprocessor(*,
file_macros=None,
file_incldirs=None,