Move the new analyzer into place.

python · ericsnowcurrently · Oct 23, 2020 · Oct 20, 2020 · Oct 20, 2020 · Oct 22, 2020
commit 04e2cda86e91a079db8df0a3ba101ff495237cbb
diff --git a/Tools/c-analyzer/README b/Tools/c-analyzer/README
@@ -0,0 +1,45 @@
+#######################################
+# C Globals and CPython Runtime State.
+
+CPython's C code makes extensive use of global variables.  Each global
+falls into one of several categories:
+
+* (effectively) constants (incl. static types)
+* globals used exclusively in main or in the REPL
+* freelists, caches, and counters
+* process-global state
+* module state
+* Python runtime state
+
+The ignored-globals.txt file is organized similarly.  Of the different
+categories, the last two are problematic and generally should not exist
+in the codebase.
+
+Globals that hold module state (i.e. in Modules/*.c) cause problems
+when multiple interpreters are in use.  For more info, see PEP 3121,
+which addresses the situation for extension modules in general.
+
+Globals in the last category should be avoided as well.  The problem
+isn't with the Python runtime having state.  Rather, the problem is with
+that state being spread throughout the codebase in dozens of individual
+globals.  Unlike the other globals, the runtime state represents a set
+of values that are constantly shifting in a complex way.  When they are
+spread out it's harder to get a clear picture of what the runtime
+involves.  Furthermore, when they are spread out it complicates efforts
+that change the runtime.
+
+Consequently, the globals for Python's runtime state have been
+consolidated under a single top-level _PyRuntime global. No new globals
+should be added for runtime state.  Instead, they should be added to
+_PyRuntimeState or one of its sub-structs.  The check-c-globals script
+should be run to ensure that no new globals have been added:
+
+  python3 Tools/c-analyzer/check-c-globals.py
+
+You can also use the more generic tool:
+
+  python3 Tools/c-analyzer/c-analyzer.py
+
+If it reports any globals then they should be resolved.  If the globals
+are runtime state then they should be folded into _PyRuntimeState.
+Otherwise they should be added to ignored-globals.txt.
diff --git a/Tools/c-analyzer/TODO< EDB7 /a> b/Tools/c-analyzer/TODO< EDB7 /a>
diff --git a/Tools/c-analyzer/c-analyzer.py b/Tools/c-analyzer/c-analyzer.py
@@ -0,0 +1,7 @@
+from cpython.__main__ import parse_args, main, configure_logger
+
+
+cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
+configure_logger(verbosity)
+with traceback_cm:
+    main(cmd, cmd_kwargs)
diff --git a/Tools/c-analyzer/c_analyzer/__init__.py b/Tools/c-analyzer/c_analyzer/__init__.py
@@ -0,0 +1,84 @@
+from c_parser import (
+    parse_files as _parse_files,
+)
+from c_parser.info import (
+    KIND,
+    TypeDeclaration,
+    filter_by_kind,
+    collate_by_kind_group,
+    resolve_parsed,
+)
+from . import (
+    analyze as _analyze,
+    datafiles as _datafiles,
+)
+from .info import Analysis
+
+
+def analyze(filenmes, **kwargs):
+    results = iter_analyis_results(filenames, **kwargs)
+    return Analysis.from_results(results)
+
+
+def iter_analysis_results(filenmes, *,
+                          known=None,
+                          **kwargs
+                          ):
+    decls = iter_decls(filenames, **kwargs)
+    yield from analyze_decls(decls, known)
+
+
+def iter_decls(filenames, *,
+               kinds=None,
+               parse_files=_parse_files,
+               **kwargs
+               ):
+    kinds = KIND.DECLS if kinds is None else (KIND.DECLS & set(kinds))
+    parse_files = parse_files or _parse_files
+
+    parsed = parse_files(filenames, **kwargs)
+    parsed = filter_by_kind(parsed, kinds)
+    for item in parsed:
+        yield resolve_parsed(item)
+
+
+def analyze_decls(decls, known, *,
+                  analyze_resolved=None,
+                  handle_unresolved=True,
+                  relroot=None,
+                  ):
+    knowntypes, knowntypespecs = _datafiles.get_known(
+        known,
+        handle_unresolved=handle_unresolved,
+        analyze_resolved=analyze_resolved,
+        relroot=relroot,
+    )
+
+    decls = list(decls)
+    collated = collate_by_kind_group(decls)
+
+    types = {decl: None for decl in collated['type']}
+    typespecs = _analyze.get_typespecs(types)
+
+    def analyze_decl(decl):
+        return _analyze.analyze_decl(
+            decl,
+            typespecs,
+            knowntypespecs,
+            types,
+            knowntypes,
+            analyze_resolved=analyze_resolved,
+        )
+    _analyze.analyze_type_decls(types, analyze_decl, handle_unresolved)
+    for decl in decls:
+        if decl in types:
+            resolved = types[decl]
+        else:
+            resolved = analyze_decl(decl)
+            if resolved and handle_unresolved:
+                typedeps, _ = resolved
+                if not isinstance(typedeps, TypeDeclaration):
+                    if not typedeps or None in typedeps:
+                        raise NotImplementedError((decl, resolved))
+
+        yield decl, resolved