8000 Flush error messages incrementally after processing a file by msullivan · Pull Request #4396 · python/mypy · GitHub
[go: up one dir, main page]

Skip to content

Flush error messages incrementally after processing a file #4396

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 9, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Flush error messages incrementally after processing a file
In order to avoid duplicate error messages for errors produced in both
load_graph() and process_graph() and to prevent misordered error
messages in a number of places, lists of error messages are now
tracked per-file.

These lists are collected and printed out when a file is complete.  To
maintain consistency with clients that use .messages() (namely,
tests), messages are generated file-at-a-time even when not printing
them out incrementally.

Fixes #1294
  • Loading branch information
msullivan committed Jan 4, 2018
commit 4b380299b40cf22b424593100c2d6b86d74f224d
26 changes: 21 additions & 5 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ def build(sources: List[BuildSource],
alt_lib_path: Optional[str] = None,
bin_dir: Optional[str] = None,
saved_cache: Optional[SavedCache] = None,
flush_errors: Optional[Callable[[List[str]], None]] = None,
plugin: Optional[Plugin] = None,
) -> BuildResult:
"""Analyze a program.

Expand All @@ -150,6 +152,8 @@ def build(sources: List[BuildSource],
bin_dir: directory containing the mypy script, used for finding data
directories; if omitted, use '.' as the data directory
saved_cache: optional dict with saved cache state for dmypy (read-write!)
flush_errors: optional function to flush errors after a file is processed
plugin: optional plugin that overrides the configured one
"""
# This seems the most reasonable place to tune garbage collection.
gc.set_threshold(50000)
Expand Down Expand Up @@ -199,7 +203,7 @@ def build(sources: List[BuildSource],
reports = Reports(data_dir, options.report_dirs)
source_set = BuildSourceSet(sources)
errors = Errors(options.show_error_context, options.show_column_numbers)
plugin = load_plugins(options, errors)
plugin = plugin or load_plugins(options, errors)

# Construct a build manager object to hold state during the build.
#
Expand All @@ -212,10 +216,12 @@ def build(sources: List[BuildSource],
version_id=__version__,
plugin=plugin,
errors=errors,
saved_cache=saved_cache)
saved_cache=saved_cache,
flush_errors=flush_errors)

try:
graph = dispatch(sources, manager)
manager.error_flush(manager.errors.new_messages())
return BuildResult(manager, graph)
finally:
manager.log("Build finished in %.3f seconds with %d modules, and %d errors" %
Expand Down Expand Up @@ -518,6 +524,7 @@ class BuildManager:
version_id: The current mypy version (based on commit id when possible)
plugin: Active mypy plugin(s)
errors: Used for reporting all errors
flush_errors: A function for optionally processing errors after each SCC
saved_cache: Dict with saved cache state for dmypy and fine-grained incremental mode
(read-write!)
stats: Dict with various instrumentation numbers
Expand All @@ -532,6 +539,7 @@ def __init__(self, data_dir: str,
version_id: str,
plugin: Plugin,
errors: Errors,
flush_errors: Optional[Callable[[List[str]], None]] = None,
saved_cache: Optional[SavedCache] = None,
) -> None:
self.start_time = time.time()
Expand All @@ -555,6 +563,7 @@ def __init__(self, data_dir: str,
self.stale_modules = set() # type: Set[str]
self.rechecked_modules = set() # type: Set[str]
self.plugin = plugin
self.flush_errors = flush_errors
self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache
self.stats = {} # type: Dict[str, Any] # Values are ints or floats

Expand Down Expand Up @@ -703,6 +712,10 @@ def add_stats(self, **kwds: Any) -> None:
def stats_summary(self) -> Mapping[str, object]:
return self.stats

def error_flush(self, msgs: List[str]) -> None:
if self.flush_errors:
self.flush_errors(msgs)


def remove_cwd_prefix_from_path(p: str) -> str:
"""Remove current working directory prefix from p, if present.
Expand Down Expand Up @@ -1973,6 +1986,10 @@ def write_cache(self) -> None:
def dependency_priorities(self) -> List[int]:
return [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies]

def generate_unused_ignore_notes(self) -> None:
if self.options.warn_unused_ignores:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you've made this effectively into a per-module option, please add it to the list of such in options.py.

self.manager.errors.generate_unused_ignore_notes(self.xpath)


def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
set_orig = set(manager.saved_cache)
Expand All @@ -1999,9 +2016,6 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
dump_graph(graph)
return graph
process_graph(graph, manager)
if manager.options.warn_unused_ignores:
# TODO: This could also be a per-module option.
manager.errors.generate_unused_ignore_notes()
updated = preserve_cache(graph)
set_updated = set(updated)
manager.saved_cache.clear()
Expand Down Expand Up @@ -2490,6 +2504,8 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
graph[id].transitive_error = True
for id in stale:
graph[id].finish_passes()
graph[id].generate_unused_ignore_notes()
manager.error_flush(manager.errors.new_file_messages(graph[id].xpath))
graph[id].write_cache()
graph[id].mark_as_rechecked()

Expand Down
108 changes: 84 additions & 24 deletions mypy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,16 @@ class Errors:
current error context (nested imports).
"""

# List of generated error messages.
error_info = None # type: List[ErrorInfo]
# Map from files to generated error messages. Is an OrderedDict so
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be safer to use the module ID rather than the file as the key? Because add_error_info() doesn't call remove_path_prefix(). And IIRC sometimes different passes have different ideas about the filename (normalized or not). However the old code makes the same assumption about error_files I suppose.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not all error infos have a module, unfortunately.

# that it can be used to order messages based on the order the
# files were processed.
error_info_map = None # type: Dict[str, List[ErrorInfo]]

# The size of error_info the last time that error messages were flushed
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no error_info any more. :-) More seriously, this design feels a bit brittle, but I'm not sure how to remove that feeling -- I was thinking of having two maps, one with flushed errors and one with "new" errors, where the new_* method transfers from the latter to the former, but that would require a whole bunch of updates to e.g. num_errors etc.

But perhaps the flushed errors are not used any more (other than being counted, and that's just used as a Boolean)? That might simplify things a bit? (It seems messages() just returns formatted_messages so it doesn't need the flushed errors either.)

new_errors_start_map = None # type: Dict[str, int]

# A cache of the formatted messages
formatted_messages = None # type: List[str]

# Current error context: nested import context/stack, as a list of (path, line) pairs.
import_ctx = None # type: List[Tuple[str, int]]
Expand Down Expand Up @@ -141,8 +149,10 @@ def __init__(self, show_error_context: bool = False,
self.initialize()

def initialize(self) -> None:
self.error_info = []
self.error_info_map = OrderedDict()
self.new_errors_start_map = defaultdict(int)
self.import_ctx = []
self.formatted_messages = []
self.error_files = set()
self.type_name = [None]
self.function_or_member = [None]
Expand Down Expand Up @@ -289,6 +299,11 @@ def report(self,
target=self.current_target())
self.add_error_info(info)

def _add_error_info(self, info: ErrorInfo) -> None:
if info.file not in self.error_info_map:
self.error_info_map[info.file] = []
self.error_info_map[info.file].append(info)

def add_error_info(self, info: ErrorInfo) -> None:
(file, line) = cast(Tuple[str, int], info.origin) # see issue 1855
if not info.blocker: # Blockers cannot be ignored
Expand All @@ -302,40 +317,41 @@ def add_error_info(self, info: ErrorInfo) -> None:
if info.message in self.only_once_messages:
return
self.only_once_messages.add(info.message)
self.error_info.append(info)
self._add_error_info(info)
self.error_files.add(file)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The whole variable error_files is redundant, it should always be equal to set(self.error_info_map.keys()). (But see my comment about the design of the latter.)


def generate_unused_ignore_notes(self) -> None:
for file, ignored_lines in self.ignored_lines.items():
if not self.is_typeshed_file(file):
for line in ignored_lines - self.used_ignored_lines[file]:
# Don't use report since add_error_info will ignore the error!
info = ErrorInfo(self.import_context(), file, self.current_module(), None,
None, line, -1, 'note', "unused 'type: ignore' comment",
False, False)
self.error_info.append(info)
def generate_unused_ignore_notes(self, file: str) -> None:
ignored_lines = self.ignored_lines[file]
if not self.is_typeshed_file(file):
for line in ignored_lines - self.used_ignored_lines[file]:
# Don't use report since add_error_info will ignore the error!
info = ErrorInfo(self.import_context(), file, self.current_module(), None,
None, line, -1, 'note', "unused 'type: ignore' comment",
False, False)
self._add_error_info(info)

def is_typeshed_file(self, file: str) -> bool:
# gross, but no other clear way to tell
return 'typeshed' in os.path.normpath(file).split(os.sep)

def num_messages(self) -> int:
"""Return the number of generated messages."""
return len(self.error_info)
return sum(len(x) for x in self.error_info_map.values())

def is_errors(self) -> bool:
"""Are there any generated errors?"""
return bool(self.error_info)
return bool(self.error_info_map)

def is_blockers(self) -> bool:
"""Are the any errors that are blockers?"""
return any(err for err in self.error_info if err.blocker)
return any(err for errs in self.error_info_map.values() for err in errs if err.blocker)

def blocker_module(self) -> Optional[str]:
"""Return the module with a blocking error, or None if not possible."""
for err in self.error_info:
if err.blocker:
return err.module
for errs in self.error_info_map.values():
for err in errs:
if err.blocker:
return err.module
return None

def is_errors_for_file(self, file: str) -> bool:
Expand All @@ -347,17 +363,22 @@ def raise_error(self) -> None:

Render the messages suitable for displaying.
"""
# self.new_messages() will format all messages that haven't already
# been returned from a new_module_messages() call. Count how many
# we've seen before that.
already_seen = len(self.formatted_messages)
raise CompileError(self.messages(),
use_stdout=True,
module_with_blocker=self.blocker_module())
module_with_blocker=self.blocker_module(),
num_already_seen=already_seen)

def messages(self) -> List[str]:
def format_messages(self, error_info: List[ErrorInfo]) -> List[str]:
"""Return a string list that represents the error messages.

Use a form suitable for displaying to the user.
"""
a = [] # type: List[str]
errors = self.render_messages(self.sort_messages(self.error_info))
errors = self.render_messages(self.sort_messages(error_info))
errors = self.remove_duplicates(errors)
for file, line, column, severity, message in errors:
s = ''
Expand All @@ -375,12 +396,48 @@ def messages(self) -> List[str]:
a.append(s)
return a

def new_file_messages(self, path: str) -> List[str]:
"""Return a string list of new error messages from a given file.

Use a form suitable for displaying to the user.
Formatted messages are cached in the order they are generated
by new_file_messages() in order to have consistency in output
between incrementally generated messages and .messages() calls.
"""
if path not in self.error_info_map:
return []
msgs = self.format_messages(self.error_info_map[path][self.new_errors_start_map[path]:])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RHS expression is a bit complicated. I'd split off self.new_errors_start_map[path] and stick it into a local variable, and maybe I'd also split off self.error_info_map[path][var:].

Another concern I have here is that it's fairly subtle that format_messages() doesn't always return the same number of messages as the length of its argument, because of remove_duplicates(). I think there are no places where the number of formatted messages is compared to the number of ErrorInfo instances, but this gives another reason why the design feels brittle to me. (Basically I don't like having state that contains an array and an index somewhere into the middle of that array.)

self.new_errors_start_map[path] = len(self.error_info_map[path])
self.formatted_messages += msgs
return msgs

def new_messages(self) -> List[str]:
"""Return a string list of new error messages.

Use a form suitable for displaying to the user.
Errors from different files are ordered based on the order in which
they first generated an error.
"""
msgs = []
for key in self.error_info_map.keys():
msgs.extend(self.new_file_messages(key))
return msgs

def messages(self) -> List[str]:
"""Return a string list that represents the error messages.

Use a form suitable for displaying to the user.
"""
self.new_messages()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment about new_messages() storing new messages as a side effect (or potentially rename the method to something that makes this more explicit).

return self.formatted_messages

def targets(self) -> Set[str]:
"""Return a set of all targets that contain errors."""
# TODO: Make sure that either target is always defined or that not being defined
# is okay for fine-grained incremental checking.
return set(info.target
for info in self.error_info
for errs in self.error_info_map.values()
for info in errs
if info.target)

def render_messages(self, errors: List[ErrorInfo]) -> List[Tuple[Optional[str], int, int,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GitHub won't let me place this comment on the line to which it applies -- the docstring for sort_messages() below: IIUC there's no longer any chance that messages in the same array have different files, so you can change "same file context" into "same context" in that docstring.

Expand Down Expand Up @@ -517,15 +574,18 @@ class CompileError(Exception):
use_stdout = False
# Can be set in case there was a module with a blocking error
module_with_blocker = None # type: Optional[str]
num_already_seen = 0

def __init__(self,
messages: List[str],
use_stdout: bool = False,
module_with_blocker: Optional[str] = None) -> None:
module_with_blocker: Optional[str] = None,
num_already_seen: int = 0) -> None:
super().__init__('\n'.join(messages))
self.messages = messages
self.use_stdout = use_stdout
self.module_with_blocker = module_with_blocker
self.num_already_seen = num_already_seen
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds another ugly wart to the API (see the code in build.py that uses this variable). It might be prettier to have another List[str] attribute unflushed_messages that's set here. (Yet another place where I don't like indices pointing into arrays. :-)



class DecodeError(Exception):
Expand Down
32 changes: 21 additions & 11 deletions mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import sys
import time

from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple
from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Callable

from mypy import build
from mypy import defaults
Expand Down Expand Up @@ -62,13 +62,27 @@ def main(script_path: Optional[str], args: Optional[List[str]] = None) -> None:
args = sys.argv[1:]
sources, options = process_options(args)
serious = False
errors = False

def flush_errors(a: List[str]) -> None:
nonlocal errors
if a:
errors = True
f = sys.stderr if serious else sys.stdout
try:
for m in a:
f.write(m + '\n')
except BrokenPipeError:
pass
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd sys.exit(1) here.


try:
res = type_check_only(sources, bin_dir, options)
res = type_check_only(sources, bin_dir, options, flush_errors)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer if the main body of test cases (mypy/test/testscheck.py) would use streaming errors (without testing that flushing happens as expected, i.e. we'd still only test that the sequence of errors in the output is as expected). Not sure if this is feasible without many changes to test cases. If not, at least it would be good to do a one-off manual check that streaming produces the same errors as not streaming in test cases.

a = res.errors
except CompileError as e:
a = e.messages
if not e.use_stdout:
serious = True
flush_errors(a[e.num_already_seen:])
if options.warn_unused_configs and options.unused_configs:
print("Warning: unused section(s) in %s: %s" %
(options.config_file,
Expand All @@ -77,13 +91,7 @@ def main(script_path: Optional[str], args: Optional[List[str]] = None) -> None:
if options.junit_xml:
t1 = time.time()
util.write_junit_xml(t1 - t0, serious, a, options.junit_xml)
if a:
f = sys.stderr if serious else sys.stdout
try:
for m in a:
f.write(m + '\n')
except BrokenPipeError:
pass
if errors:
sys.exit(1)


Expand Down Expand Up @@ -112,11 +120,13 @@ def readlinkabs(link: str) -> str:


def type_check_only(sources: List[BuildSource], bin_dir: Optional[str],
options: Options) -> BuildResult:
options: Options,
flush_errors: Optional[Callable[[List[str]], None]]) -> BuildResult:
# Type-check the program and dependencies.
return build.build(sources=sources,
bin_dir=bin_dir,
options=options)
options=options,
flush_errors=flush_errors)


FOOTER = """environment variables:
Expand Down
5 changes: 3 additions & 2 deletions mypy/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,9 @@ def copy(self) -> 'MessageBuilder':
def add_errors(self, messages: 'MessageBuilder') -> None:
"""Add errors in messages to this builder."""
if self.disable_count <= 0:
for info in messages.errors.error_info:
self.errors.add_error_info(info)
for errs in messages.errors.error_info_map.values():
for info in errs:
self.errors.add_error_info(info)

def disable_errors(self) -> None:
self.disable_count += 1
Expand Down
Loading
0