Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Per directory configs - preliminary changes #9550

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .pyenchant_pylint_custom_dict.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ codecs
col's
conf
config
configs
const
Const
contextlib
Expand Down Expand Up @@ -310,11 +311,13 @@ str
stringified
subclasses
subcommands
subconfigs
subdicts
subgraphs
sublists
submodule
submodules
subpackage
subparsers
subparts
subprocess
Expand Down
5 changes: 5 additions & 0 deletions pylint/config/arguments_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def __init__(
self._directory_namespaces: DirectoryNamespaceDict = {}
"""Mapping of directories and their respective namespace objects."""

self._cli_args: list[str] = []
"""Options that were passed as command line arguments and have highest priority."""

@property
def config(self) -> argparse.Namespace:
"""Namespace for all options."""
Expand Down Expand Up @@ -226,6 +229,8 @@ def _parse_command_line_configuration(
) -> list[str]:
"""Parse the arguments found on the command line into the namespace."""
arguments = sys.argv[1:] if arguments is None else arguments
if not self._cli_args:
self._cli_args = list(arguments)

self.config, parsed_args = self._arg_parser.parse_known_args(
arguments, self.config
Expand Down
2 changes: 1 addition & 1 deletion pylint/config/config_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def parse_config_file(
raise OSError(f"The config file {file_path} doesn't exist!")

if verbose:
print(f"Using config file {file_path}", file=sys.stderr)
print(f"Loading config file {file_path}", file=sys.stderr)

if file_path.suffix == ".toml":
return _RawConfParser.parse_toml_file(file_path)
Expand Down
7 changes: 6 additions & 1 deletion pylint/config/config_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pylint.lint import PyLinter


# pylint: disable = too-many-statements
def _config_initialization(
linter: PyLinter,
args_list: list[str],
Expand Down Expand Up @@ -82,6 +83,9 @@ def _config_initialization(
args_list = _order_all_first(args_list, joined=True)
parsed_args_list = linter._parse_command_line_configuration(args_list)

# save Runner.verbose to make this preprocessed option visible from other modules
linter.config.verbose = verbose_mode
DanielNoord marked this conversation as resolved.
Show resolved Hide resolved

# Remove the positional arguments separator from the list of arguments if it exists
try:
parsed_args_list.remove("--")
Expand Down Expand Up @@ -141,7 +145,8 @@ def _config_initialization(
linter._parse_error_mode()

# Link the base Namespace object on the current directory
linter._directory_namespaces[Path(".").resolve()] = (linter.config, {})
if len(linter._directory_namespaces) == 0:
linter._directory_namespaces[Path(".").resolve()] = (linter.config, {})
Comment on lines +148 to +149
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I'd prefer to revert the changes in these two lines.

They are really rightly coupled to the final implementation of the per directory configs and the performance impact is hard to judge on its own. As far as I can see, all other changes in this PR are somewhat sensible on their own. This one isn't.


# parsed_args_list should now only be a list of inputs to lint.
# All other options have been removed from the list.
Expand Down
12 changes: 7 additions & 5 deletions pylint/config/find_default_config_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,19 @@ def _cfg_has_config(path: Path | str) -> bool:
return any(section.startswith("pylint.") for section in parser.sections())


def _yield_default_files() -> Iterator[Path]:
def _yield_default_files(basedir: Path = Path(".")) -> Iterator[Path]:
"""Iterate over the default config file names and see if they exist."""
basedir = Path(basedir)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
basedir = Path(basedir)

That should not be needed.

for config_name in CONFIG_NAMES:
config_file = basedir / config_name
try:
if config_name.is_file():
if config_name.suffix == ".toml" and not _toml_has_config(config_name):
if config_file.is_file():
if config_file.suffix == ".toml" and not _toml_has_config(config_file):
continue
if config_name.suffix == ".cfg" and not _cfg_has_config(config_name):
if config_file.suffix == ".cfg" and not _cfg_has_config(config_file):
continue

yield config_name.resolve()
yield config_file.resolve()
except OSError:
pass

Expand Down
77 changes: 52 additions & 25 deletions pylint/lint/pylinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
ModuleDescriptionDict,
Options,
)
from pylint.utils import ASTWalker, FileState, LinterStats, utils
from pylint.utils import ASTWalker, FileState, LinterStats, merge_stats, utils
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain why we need the merging of stats for the multi-dir config option?

Copy link
Author

@0nf 0nf Apr 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current state is that some stat counters are reset during linter.open(), some are reset in linter.set_current_module -> init_single_module(), and some are not reset at all (error, warning etc in 1st group, all counters in stats.by_module in 2nd group, statement in 3rd). It leads to incorrect score calculation when linter (i.e. main checker) is opened per file, or when it is opened after getting asts.
So I decided to reset all possible counters for each new module by creating new LinterStats object in set_current_module.

If stats reset is omitted entirely, then another problem arises:
When jobs>1, the same linter object can be used for checking several modules, stats after each module are copied and then merged.
It leads to a situation when some stats are accounted several times in final result (it's checked in test_subconfigs_score in my 1st PR).

Explicit stats reset and merge in single process can be avoided, but it will require additional changes in code for parallel checks. I'd suggest to leave it as a possible optimization in another PR.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, you probably already explain it but I don't fully understand. This explanation seems to point to a general issue with stats merging, not something that has to do with multi-directory configs. Or am I misunderstanding you? If it is just a general issue we should tackle it in a separate PR.


MANAGER = astroid.MANAGER

Expand Down Expand Up @@ -317,6 +317,7 @@ def __init__(

# Attributes related to stats
self.stats = LinterStats()
self.all_stats: list[LinterStats] = []

# Attributes related to (command-line) options and their parsing
self.options: Options = options + _make_linter_options(self)
Expand Down Expand Up @@ -665,12 +666,12 @@ def check(self, files_or_modules: Sequence[str]) -> None:
"Missing filename required for --from-stdin"
)

extra_packages_paths = list(
{
extra_packages_paths_set = set()
for file_or_module in files_or_modules:
extra_packages_paths_set.add(
discover_package_path(file_or_module, self.config.source_roots)
for file_or_module in files_or_modules
}
)
)
extra_packages_paths = list(extra_packages_paths_set)

# TODO: Move the parallel invocation into step 3 of the checking process
if not self.config.from_stdin and self.config.jobs > 1:
Expand All @@ -693,13 +694,12 @@ def check(self, files_or_modules: Sequence[str]) -> None:
fileitems = self._iterate_file_descrs(files_or_modules)
data = None

# The contextmanager also opens all checkers and sets up the PyLinter class
with augmented_sys_path(extra_packages_paths):
# 2) Get the AST for each FileItem
ast_per_fileitem = self._get_asts(fileitems, data)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we have unintended effects from not getting these within the context manager..

# 3) Lint each ast
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line should be down below

# The contextmanager also opens all checkers and sets up the PyLinter class
with self._astroid_module_checker() as check_astroid_module:
# 2) Get the AST for each FileItem
ast_per_fileitem = self._get_asts(fileitems, data)

# 3) Lint each ast
self._lint_files(ast_per_fileitem, check_astroid_module)

def _get_asts(
Expand All @@ -710,6 +710,7 @@ def _get_asts(

for fileitem in fileitems:
self.set_current_module(fileitem.name, fileitem.filepath)
self._set_astroid_options()

try:
ast_per_fileitem[fileitem] = self.get_ast(
Expand All @@ -735,13 +736,14 @@ def check_single_file_item(self, file: FileItem) -> None:

initialize() should be called before calling this method
"""
self.set_current_module(file.name, file.filepath)
with self._astroid_module_checker() as check_astroid_module:
self._check_file(self.get_ast, check_astroid_module, file)

def _lint_files(
self,
ast_mapping: dict[FileItem, nodes.Module | None],
check_astroid_module: Callable[[nodes.Module], bool | None],
check_astroid_module: Callable[[nodes.Module], bool | None] | None,
) -> None:
"""Lint all AST modules from a mapping.."""
for fileitem, module in ast_mapping.items():
Expand All @@ -760,12 +762,17 @@ def _lint_files(
)
else:
self.add_message("fatal", args=msg, confidence=HIGH)
# current self.stats is needed in merge - it contains stats from last module
finished_run_stats = merge_stats([*self.all_stats, self.stats])
# after _lint_files linter.stats is aggregate stats from all modules, like after check_parallel
self.all_stats = []
self.stats = finished_run_stats

def _lint_file(
self,
file: FileItem,
module: nodes.Module,
check_astroid_module: Callable[[nodes.Module], bool | None],
check_astroid_module: Callable[[nodes.Module], bool | None] | None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR is really touching some of the core behaviour of this behemoth of a class so it is a bit hard to review. Sorry in advance.

Why is this now optional? I don't really like that design as it further complicates this function body. Could you explain why this is needed? And could that perhaps be a separate PR?

) -> None:
"""Lint a file using the passed utility function check_astroid_module).

Expand All @@ -784,7 +791,13 @@ def _lint_file(
self.current_file = module.file

try:
check_astroid_module(module)
# call _astroid_module_checker after set_current_module, when
# self.config is the right config for current module
if check_astroid_module is None:
with self._astroid_module_checker() as local_check_astroid_module:
local_check_astroid_module(module)
else:
check_astroid_module(module)
except Exception as e:
raise astroid.AstroidError from e

Expand Down Expand Up @@ -898,33 +911,43 @@ def _expand_files(
def set_current_module(self, modname: str, filepath: str | None = None) -> None:
"""Set the name of the currently analyzed module and
init statistics for it.

Save current stats before init to make sure no counters for
error, statement, etc are missed.
"""
if not modname and filepath is None:
return
self.reporter.on_set_current_module(modname or "", filepath)
self.current_name = modname
self.current_file = filepath or modname
self.all_stats.append(self.stats)
self.stats = LinterStats()
self.stats.init_single_module(modname or "")

# If there is an actual filepath we might need to update the config attribute
if filepath:
namespace = self._get_namespace_for_file(
Path(filepath), self._directory_namespaces
config_path, namespace = self._get_namespace_for_file(
Path(filepath).resolve(), self._directory_namespaces
Comment on lines +929 to +930
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The linked report traces the regression to the call to resolve().

I wonder if we can guard it under not path.is_absolute():

>>> from timeit import timeit
>>> timeit('p.is_absolute()', setup='from pathlib import Path; p=Path(".")')
0.1643185840221122
>>> timeit('p.resolve()', setup='from pathlib import Path; p=Path(".")')
10.929103292000946

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edit: Seems like is_absolute() will always be false as things stand, so we probably need to look higher up the stack for a place to do some sort of conversion.

)
if namespace:
self.config = namespace or self._base_config
self.config = namespace
if self.config.verbose:
print(
f"Using config file from {config_path} for {filepath}",
file=sys.stderr,
)

def _get_namespace_for_file(
self, filepath: Path, namespaces: DirectoryNamespaceDict
) -> argparse.Namespace | None:
) -> tuple[Path | None, argparse.Namespace | None]:
for directory in namespaces:
if _is_relative_to(filepath, directory):
namespace = self._get_namespace_for_file(
_, namespace = self._get_namespace_for_file(
filepath, namespaces[directory][1]
)
if namespace is None:
return namespaces[directory][0]
return None
return directory, namespaces[directory][0]
return None, None

@contextlib.contextmanager
def _astroid_module_checker(
Expand Down Expand Up @@ -953,7 +976,7 @@ def _astroid_module_checker(
rawcheckers=rawcheckers,
)

# notify global end
# notify end of module if jobs>1, global end otherwise
self.stats.statement = walker.nbstatements
for checker in reversed(_checkers):
checker.close()
Expand Down Expand Up @@ -1068,22 +1091,26 @@ def _check_astroid_module(
walker.walk(node)
return True

def open(self) -> None:
"""Initialize counters."""
def _set_astroid_options(self) -> None:
"""Pass some config values to astroid.MANAGER object."""
MANAGER.always_load_extensions = self.config.unsafe_load_any_extension
MANAGER.max_inferable_values = self.config.limit_inference_results
MANAGER.extension_package_whitelist.update(self.config.extension_pkg_allow_list)
if self.config.extension_pkg_whitelist:
MANAGER.extension_package_whitelist.update(
self.config.extension_pkg_whitelist
)
self.stats.reset_message_count()

def open(self) -> None:
"""Initialize self as main checker for one or more modules."""
self._set_astroid_options()

def generate_reports(self, verbose: bool = False) -> int | None:
"""Close the whole package /module, it's time to make reports !

if persistent run, pickle results for later comparison
"""
self.config = self._base_config
# Display whatever messages are left on the reporter.
self.reporter.display_messages(report_nodes.Section())
if not self.file_state._is_base_filestate:
Expand Down
20 changes: 20 additions & 0 deletions tests/lint/test_pylinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
from unittest.mock import patch

from pytest import CaptureFixture
from test_regr import REGR_DATA

from pylint.lint.pylinter import PyLinter
from pylint.typing import FileItem
from pylint.utils import FileState


Expand Down Expand Up @@ -48,3 +50,21 @@ def test_crash_during_linting(
assert len(files) == 1
assert "pylint-crash-20" in str(files[0])
assert any(m.symbol == "astroid-error" for m in linter.reporter.messages)


def test_global_vs_local_astroid_module() -> None:
fileitems = iter(
[
FileItem("filename", __file__, "modname"),
FileItem("name2", str(Path(REGR_DATA, "decimal_inference.py")), "mod2"),
]
)
linter1 = PyLinter()
ast_mapping = linter1._get_asts(fileitems, None)
with linter1._astroid_module_checker() as check_astroid_module:
linter1._lint_files(ast_mapping, check_astroid_module)
stats1 = linter1.stats
linter2 = PyLinter()
linter2._lint_files(ast_mapping, None)
stats2 = linter2.stats
assert str(stats1) == str(stats2)
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ commands =
basepython = python3
deps =
-r {toxinidir}/requirements_test.txt
pre-commit~=2.20
commands =
pre-commit run --all-files

Expand Down