Flush error messages incrementally after processing a file

In order to avoid duplicate error messages for errors produced in both load_graph() and process_graph() and to prevent misordered error messages in a number of places, lists of error messages are now tracked per-file. These lists are collected and printed out when a file is complete. To maintain consistency with clients that use .messages() (namely, tests), messages are generated file-at-a-time even when not printing them out incrementally. Fixes #1294
python · gvanrossum · Jan 9, 2018 · Dec 15, 2017 · Dec 21, 2017 · Dec 21, 2017
commit 4b380299b40cf22b424593100c2d6b86d74f224d
diff --git a/mypy/build.py b/mypy/build.py
@@ -133,6 +133,8 @@ def build(sources: List[BuildSource],
           alt_lib_path: Optional[str] = None,
           bin_dir: Optional[str] = None,
           saved_cache: Optional[SavedCache] = None,
+          flush_errors: Optional[Callable[[List[str]], None]] = None,
+          plugin: Optional[Plugin] = None,
           ) -> BuildResult:
     """Analyze a program.
 
@@ -150,6 +152,8 @@ def build(sources: List[BuildSource],
       bin_dir: directory containing the mypy script, used for finding data
         directories; if omitted, use '.' as the data directory
       saved_cache: optional dict with saved cache state for dmypy (read-write!)
+      flush_errors: optional function to flush errors after a file is processed
+      plugin: optional plugin that overrides the configured one
     """
     # This seems the most reasonable place to tune garbage collection.
     gc.set_threshold(50000)
@@ -199,7 +203,7 @@ def build(sources: List[BuildSource],
     reports = Reports(data_dir, options.report_dirs)
     source_set = BuildSourceSet(sources)
     errors = Errors(options.show_error_context, options.show_column_numbers)
-    plugin = load_plugins(options, errors)
+    plugin = plugin or load_plugins(options, errors)
 
     # Construct a build manager object to hold state during the build.
     #
@@ -212,10 +216,12 @@ def build(sources: List[BuildSource],
                            version_id=__version__,
                            plugin=plugin,
                            errors=errors,
-                           saved_cache=saved_cache)
+                           saved_cache=saved_cache,
+                           flush_errors=flush_errors)
 
     try:
         graph = dispatch(sources, manager)
+        manager.error_flush(manager.errors.new_messages())
         return BuildResult(manager, graph)
     finally:
         manager.log("Build finished in %.3f seconds with %d modules, and %d errors" %
@@ -518,6 +524,7 @@ class BuildManager:
       version_id:      The current mypy version (based on commit id when possible)
       plugin:          Active mypy plugin(s)
       errors:          Used for reporting all errors
+      flush_errors:    A function for optionally processing errors after each SCC
       saved_cache:     Dict with saved cache state for dmypy and fine-grained incremental mode
                        (read-write!)
       stats:           Dict with various instrumentation numbers
@@ -532,6 +539,7 @@ def __init__(self, data_dir: str,
                  version_id: str,
                  plugin: Plugin,
                  errors: Errors,
+                 flush_errors: Optional[Callable[[List[str]], None]] = None,
                  saved_cache: Optional[SavedCache] = None,
                  ) -> None:
         self.start_time = time.time()
@@ -555,6 +563,7 @@ def __init__(self, data_dir: str,
         self.stale_modules = set()  # type: Set[str]
         self.rechecked_modules = set()  # type: Set[str]
         self.plugin = plugin
+        self.flush_errors = flush_errors
         self.saved_cache = saved_cache if saved_cache is not None else {}  # type: SavedCache
         self.stats = {}  # type: Dict[str, Any]  # Values are ints or floats
 
@@ -703,6 +712,10 @@ def add_stats(self, **kwds: Any) -> None:
     def stats_summary(self) -> Mapping[str, object]:
         return self.stats
 
+    def error_flush(self, msgs: List[str]) -> None:
+        if self.flush_errors:
+            self.flush_errors(msgs)
+
 
 def remove_cwd_prefix_from_path(p: str) -> str:
     """Remove current working directory prefix from p, if present.
@@ -1973,6 +1986,10 @@ def write_cache(self) -> None:
     def dependency_priorities(self) -> List[int]:
         return [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies]
 
+    def generate_unused_ignore_notes(self) -> None:
+        if self.options.warn_unused_ignores:
+            self.manager.errors.generate_unused_ignore_notes(self.xpath)
+
 
 def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
     set_orig = set(manager.saved_cache)
@@ -1999,9 +2016,6 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
         dump_graph(graph)
         return graph
     process_graph(graph, manager)
-    if manager.options.warn_unused_ignores:
-        # TODO: This could also be a per-module option.
-        manager.errors.generate_unused_ignore_notes()
     updated = preserve_cache(graph)
     set_updated = set(updated)
     manager.saved_cache.clear()
@@ -2490,6 +2504,8 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
             graph[id].transitive_error = True
     for id in stale:
         graph[id].finish_passes()
+        graph[id].generate_unused_ignore_notes()
+        manager.error_flush(manager.errors.new_file_messages(graph[id].xpath))
         graph[id].write_cache()
         graph[id].mark_as_rechecked()
 

diff --git a/mypy/errors.py b/mypy/errors.py
@@ -90,8 +90,16 @@ class Errors:
     current error context (nested imports).
     """
 
-    # List of generated error messages.
-    error_info = None  # type: List[ErrorInfo]
+    # Map from files to generated error messages. Is an OrderedDict so
+    # that it can be used to order messages based on the order the
+    # files were processed.
+    error_info_map = None  # type: Dict[str, List[ErrorInfo]]
+
+    # The size of error_info the last time that error messages were flushed
+    new_errors_start_map = None  # type: Dict[str, int]
+
+    # A cache of the formatted messages
+    formatted_messages = None  # type: List[str]
 
     # Current error context: nested import context/stack, as a list of (path, line) pairs.
     import_ctx = None  # type: List[Tuple[str, int]]
@@ -141,8 +149,10 @@ def __init__(self, show_error_context: bool = False,
         self.initialize()
 
     def initialize(self) -> None:
-        self.error_info = []
+        self.error_info_map = OrderedDict()
+        self.new_errors_start_map = defaultdict(int)
         self.import_ctx = []
+        self.formatted_messages = []
         self.error_files = set()
         self.type_name = [None]
         self.function_or_member = [None]
@@ -289,6 +299,11 @@ def report(self,
                          target=self.current_target())
         self.add_error_info(info)
 
+    def _add_error_info(self, info: ErrorInfo) -> None:
+        if info.file not in self.error_info_map:
+            self.error_info_map[info.file] = []
+        self.error_info_map[info.file].append(info)
+
     def add_error_info(self, info: ErrorInfo) -> None:
         (file, line) = cast(Tuple[str, int], info.origin)  # see issue 1855
         if not info.blocker:  # Blockers cannot be ignored
@@ -302,40 +317,41 @@ def add_error_info(self, info: ErrorInfo) -> None:
             if info.message in self.only_once_messages:
                 return
             self.only_once_messages.add(info.message)
-        self.error_info.append(info)
+        self._add_error_info(info)
         self.error_files.add(file)
 
-    def generate_unused_ignore_notes(self) -> None:
-        for file, ignored_lines in self.ignored_lines.items():
-            if not self.is_typeshed_file(file):
-                for line in ignored_lines - self.used_ignored_lines[file]:
-                    # Don't use report since add_error_info will ignore the error!
-                    info = ErrorInfo(self.import_context(), file, self.current_module(), None,
-                                     None, line, -1, 'note', "unused 'type: ignore' comment",
-                                     False, False)
-                    self.error_info.append(info)
+    def generate_unused_ignore_notes(self, file: str) -> None:
+        ignored_lines = self.ignored_lines[file]
+        if not self.is_typeshed_file(file):
+            for line in ignored_lines - self.used_ignored_lines[file]:
+                # Don't use report since add_error_info will ignore the error!
+                info = ErrorInfo(self.import_context(), file, self.current_module(), None,
+                                 None, line, -1, 'note', "unused 'type: ignore' comment",
+                                 False, False)
+                self._add_error_info(info)
 
     def is_typeshed_file(self, file: str) -> bool:
         # gross, but no other clear way to tell
         return 'typeshed' in os.path.normpath(file).split(os.sep)
 
     def num_messages(self) -> int:
         """Return the number of generated messages."""
-        return len(self.error_info)
+        return sum(len(x) for x in self.error_info_map.values())
 
     def is_errors(self) -> bool:
         """Are there any generated errors?"""
-        return bool(self.error_info)
+        return bool(self.error_info_map)
 
     def is_blockers(self) -> bool:
         """Are the any errors that are blockers?"""
-        return any(err for err in self.error_info if err.blocker)
+        return any(err for errs in self.error_info_map.values() for err in errs if err.blocker)
 
     def blocker_module(self) -> Optional[str]:
         """Return the module with a blocking error, or None if not possible."""
-        for err in self.error_info:
-            if err.blocker:
-                return err.module
+        for errs in self.error_info_map.values():
+            for err in errs:
+                if err.blocker:
+                    return err.module
         return None
 
     def is_errors_for_file(self, file: str) -> bool:
@@ -347,17 +363,22 @@ def raise_error(self) -> None:
 
         Render the messages suitable for displaying.
         """
+        # self.new_messages() will format all messages that haven't already
+        # been returned from a new_module_messages() call. Count how many
+        # we've seen before that.
+        already_seen = len(self.formatted_messages)
         raise CompileError(self.messages(),
                            use_stdout=True,
-                           module_with_blocker=self.blocker_module())
+                           module_with_blocker=self.blocker_module(),
+                           num_already_seen=already_seen)
 
-    def messages(self) -> List[str]:
+    def format_messages(self, error_info: List[ErrorInfo]) -> List[str]:
         """Return a string list that represents the error messages.
 
         Use a form suitable for displaying to the user.
         """
         a = []  # type: List[str]
-        errors = self.render_messages(self.sort_messages(self.error_info))
+        errors = self.render_messages(self.sort_messages(error_info))
         errors = self.remove_duplicates(errors)
         for file, line, column, severity, message in errors:
             s = ''
@@ -375,12 +396,48 @@ def messages(self) -> List[str]:
             a.append(s)
         return a
 
+    def new_file_messages(self, path: str) -> List[str]:
+        """Return a string list of new error messages from a given file.
+
+        Use a form suitable for displaying to the user.
+        Formatted messages are cached in the order they are generated
+        by new_file_messages() in order to have consistency in output
+        between incrementally generated messages and .messages() calls.
+        """
+        if path not in self.error_info_map:
+            return []
+        msgs = self.format_messages(self.error_info_map[path][self.new_errors_start_map[path]:])
+        self.new_errors_start_map[path] = len(self.error_info_map[path])
+        self.formatted_messages += msgs
+        return msgs
+
+    def new_messages(self) -> List[str]:
+        """Return a string list of new error messages.
+
+        Use a form suitable for displaying to the user.
+        Errors from different files are ordered based on the order in which
+        they first generated an error.
+        """
+        msgs = []
+        for key in self.error_info_map.keys():
+            msgs.extend(self.new_file_messages(key))
+        return msgs
+
+    def messages(self) -> List[str]:
+        """Return a string list that represents the error messages.
+
+        Use a form suitable for displaying to the user.
+        """
+        self.new_messages()
+        return self.formatted_messages
+
     def targets(self) -> Set[str]:
         """Return a set of all targets that contain errors."""
         # TODO: Make sure that either target is always defined or that not being defined
         #       is okay for fine-grained incremental checking.
         return set(info.target
-                   for info in self.error_info
+                   for errs in self.error_info_map.values()
+                   for info in errs
                    if info.target)
 
     def render_messages(self, errors: List[ErrorInfo]) -> List[Tuple[Optional[str], int, int,
@@ -517,15 +574,18 @@ class CompileError(Exception):
     use_stdout = False
     # Can be set in case there was a module with a blocking error
     module_with_blocker = None  # type: Optional[str]
+    num_already_seen = 0
 
     def __init__(self,
                  messages: List[str],
                  use_stdout: bool = False,
-                 module_with_blocker: Optional[str] = None) -> None:
+                 module_with_blocker: Optional[str] = None,
+                 num_already_seen: int = 0) -> None:
         super().__init__('\n'.join(messages))
         self.messages = messages
         self.use_stdout = use_stdout
         self.module_with_blocker = module_with_blocker
+        self.num_already_seen = num_already_seen
 
 
 class DecodeError(Exception):

diff --git a/mypy/main.py b/mypy/main.py
@@ -8,7 +8,7 @@
 import sys
 import time
 
-from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Callable
 
 from mypy import build
 from mypy import defaults
@@ -62,13 +62,27 @@ def main(script_path: Optional[str], args: Optional[List[str]] = None) -> None:
         args = sys.argv[1:]
     sources, options = process_options(args)
     serious = False
+    errors = False
+
+    def flush_errors(a: List[str]) -> None:
+        nonlocal errors
+        if a:
+            errors = True
+        f = sys.stderr if serious else sys.stdout
+        try:
+            for m in a:
+                f.write(m + '\n')
+        except BrokenPipeError:
+            pass
+
     try:
-        res = type_check_only(sources, bin_dir, options)
+        res = type_check_only(sources, bin_dir, options, flush_errors)
         a = res.errors
     except CompileError as e:
         a = e.messages
         if not e.use_stdout:
             serious = True
+        flush_errors(a[e.num_already_seen:])
     if options.warn_unused_configs and options.unused_configs:
         print("Warning: unused section(s) in %s: %s" %
               (options.config_file,
@@ -77,13 +91,7 @@ def main(script_path: Optional[str], args: Optional[List[str]] = None) -> None:
     if options.junit_xml:
         t1 = time.time()
         util.write_junit_xml(t1 - t0, serious, a, options.junit_xml)
-    if a:
-        f = sys.stderr if serious else sys.stdout
-        try:
-            for m in a:
-                f.write(m + '\n')
-        except BrokenPipeError:
-            pass
+    if errors:
         sys.exit(1)
 
 
@@ -112,11 +120,13 @@ def readlinkabs(link: str) -> str:
 
 
 def type_check_only(sources: List[BuildSource], bin_dir: Optional[str],
-                    options: Options) -> BuildResult:
+                    options: Options,
+                    flush_errors: Optional[Callable[[List[str]], None]]) -> BuildResult:
     # Type-check the program and dependencies.
     return build.build(sources=sources,
                        bin_dir=bin_dir,
-                       options=options)
+                       options=options,
+                       flush_errors=flush_errors)
 
 
 FOOTER = """environment variables:

diff --git a/mypy/messages.py b/mypy/messages.py
@@ -153,8 +153,9 @@ def copy(self) -> 'MessageBuilder':
     def add_errors(self, messages: 'MessageBuilder') -> None:
         """Add errors in messages to this builder."""
         if self.disable_count <= 0:
-            for info in messages.errors.error_info:
-                self.errors.add_error_info(info)
+            for errs in messages.errors.error_info_map.values():
+                for info in errs:
+                    self.errors.add_error_info(info)
 
     def disable_errors(self) -> None:
         self.disable_count += 1