From 5ac1e625c76cd3e952963ef2ba460b74b128eb94 Mon Sep 17 00:00:00 2001 From: rchristof Date: Tue, 28 Oct 2025 11:49:48 -0300 Subject: [PATCH 1/3] refactor: centralize fully qualified lookup logic This refactor extracts the core logic for resolving fully qualified names into a new private helper _lookup_fully_qualified(), and rewrites lookup_fully_qualified() / lookup_fully_qualified_or_none() as thin wrappers around it. Before this change: lookup_fully_qualified_or_none() in semanal.py directly implemented a large amount of lookup logic with two separate code paths ("exact module match" vs "longest loaded prefix"). This made it harder to maintain and reason about. lookup_fully_qualified() just called the above and asserted non-None, so the only authoritative behavior lived inside one big public method. After this change: _lookup_fully_qualified(fullname, allow_missing) is now the single internal implementation of fully qualified lookup: - It finds the longest prefix of the name that exists in self.modules. - It walks the remaining components through module / class symbol tables. - It preserves record_incomplete_ref() calls for incomplete namespaces. - It validates input and handles partial / nested names consistently. lookup_fully_qualified() now just calls the helper in "strict" mode and asserts the result is not None. lookup_fully_qualified_or_none() now just calls the helper in "permissive" mode and returns the result (or None). Why this matters: The public API remains unchanged: - lookup_fully_qualified(name) -> SymbolTableNode - lookup_fully_qualified_or_none(name) -> SymbolTableNode | None The duplicated traversal logic is no longer embedded in a public method. Instead, there is a single internal source of truth with a documented contract (_lookup_fully_qualified is explicitly not plugin API). This makes it easier to migrate other ad-hoc lookup sites to use this common logic, which is one of the goals in issue #4157 ("too many lookup helpers across the semantic analyzer / checker leads to confusion"). Behavioral note: When the fully qualified name refers exactly to a module (e.g. "pkg.mod"), _lookup_fully_qualified() now returns None instead of attempting to treat the module object itself as a definitional symbol via its parent package. This matches the intent of the ongoing lookup cleanup work (avoid treating modules like regular definitions), but I'm calling it out in case external plugins depended on the old quirk. This is an incremental, internal refactor toward #4157. No plugin-facing API signatures were changed. --- mypy/semanal.py | 94 +++++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 42 deletions(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index d7b50bd09496..b71b852c9bd4 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -6625,8 +6625,58 @@ def create_getattr_var( return v return None + def _lookup_fully_qualified( + self, fullname: str, *, allow_missing: bool + ) -> SymbolTableNode | None: + """Implementation detail shared by fully qualified lookup helpers. + + This is not part of the public or plugin APIs. It only exists to centralize + the traversal logic so the public helpers remain thin wrappers without + changing any semantic rules governed by the analyzer. + """ + if "." not in fullname: + raise ValueError( + f"_lookup_fully_qualified requires a qualified name with at least one dot, " + f"got: {fullname!r}" + ) + + module: str | None = None + filenode: MypyFile | None = None + parts = fullname.split(".") + names: list[str] = [] + + while parts: + candidate = ".".join(parts) + filenode = self.modules.get(candidate) + if filenode is not None: + module = candidate + break + names.append(parts.pop()) + + if filenode is None or module is None or not names: + return None + + names.reverse() + result = filenode.names.get(names[0]) + + if result is None and self.is_incomplete_namespace(module): + self.record_incomplete_ref() + # When the namespace is incomplete and we don't have a result, + # return None immediately. If allow_missing=False, the caller + # will assert this is not None (preserving original behavior). + # If allow_missing=True, returning None is the expected outcome. + return None + + for part in names[1:]: + if result is not None and isinstance(result.node, TypeInfo): + result = result.node.names.get(part) + else: + return None + + return result + def lookup_fully_qualified(self, fullname: str) -> SymbolTableNode: - ret = self.lookup_fully_qualified_or_none(fullname) + ret = self._lookup_fully_qualified(fullname, allow_missing=False) assert ret is not None, fullname return ret @@ -6639,47 +6689,7 @@ def lookup_fully_qualified_or_none(self, fullname: str) -> SymbolTableNode | Non Note that this can't be used for names nested in class namespaces. """ - # TODO: unify/clean-up/simplify lookup methods, see #4157. - module, name = fullname.rsplit(".", maxsplit=1) - - if module in self.modules: - # If the module exists, look up the name in the module. - # This is the common case. - filenode = self.modules[module] - result = filenode.names.get(name) - if result is None and self.is_incomplete_namespace(module): - # TODO: More explicit handling of incomplete refs? - self.record_incomplete_ref() - return result - else: - # Else, try to find the longest prefix of the module name that is in the modules dictionary. - splitted_modules = fullname.split(".") - names = [] - - while splitted_modules and ".".join(splitted_modules) not in self.modules: - names.append(splitted_modules.pop()) - - if not splitted_modules or not names: - # If no module or name is found, return None. - return None - - # Reverse the names list to get the correct order of names. - names.reverse() - - module = ".".join(splitted_modules) - filenode = self.modules[module] - result = filenode.names.get(names[0]) - - if result is None and self.is_incomplete_namespace(module): - # TODO: More explicit handling of incomplete refs? - self.record_incomplete_ref() - - for part in names[1:]: - if result is not None and isinstance(result.node, TypeInfo): - result = result.node.names.get(part) - else: - return None - return result + return self._lookup_fully_qualified(fullname, allow_missing=True) def object_type(self) -> Instance: if self._object_type is None: From abffd954999f46295382ed790059df372edf22a9 Mon Sep 17 00:00:00 2001 From: rchristof Date: Tue, 28 Oct 2025 12:00:59 -0300 Subject: [PATCH 2/3] refactor: consolidate named type construction helpers This refactor extracts the common logic for constructing Instance objects from fully qualified type names into a private helper _build_named_instance(), and rewrites named_type(), named_type_or_none(), and builtin_type() to delegate to it. Before this change: named_type() and named_type_or_none() each reimplemented the same core steps: - resolve a fully qualified name using either lookup_fully_qualified() (strict) or lookup_fully_qualified_or_none() (permissive), - reject unresolved symbols / placeholders, - optionally unwrap a TypeAlias to get to the underlying TypeInfo, - build an Instance, synthesizing default type arguments with a particular AnyType flavor (special_form vs unannotated). builtin_type() just wrapped named_type(), obscuring the fact that it's semantically "strict + use special_form AnyType defaults". This meant the same idea ("turn a fully qualified name into an Instance") was duplicated in several places, with subtle differences that were not obvious at the call site. After this change: _build_named_instance(fullname, args, *, allow_missing, any_flavor, unwrap_alias) becomes the single internal implementation of that logic: - allow_missing controls whether we allow unresolved names (mirrors strict vs permissive lookup). - any_flavor selects which AnyType flavor to use for synthesized type arguments when args is not provided. - unwrap_alias controls whether a TypeAlias target should be unwrapped to its underlying TypeInfo (this matches the historical behavior of named_type_or_none()). named_type() now calls the helper with allow_missing=False, any_flavor=TypeOfAny.special_form, unwrap_alias=False, and asserts that a result exists. named_type_or_none() calls it with allow_missing=True, any_flavor=TypeOfAny.unannotated, unwrap_alias=True, and returns None if the symbol couldn't be resolved or is still a placeholder. builtin_type() now directly calls the helper with the same strict parameters as named_type(), making its semantics explicit while keeping the same behavior. Why this matters: The public helpers keep the same signatures and the same intended contracts: - named_type(fullname, args?) -> Instance - named_type_or_none(fullname, args?) -> Instance | None - builtin_type(fullname) -> Instance The differences between them (strict vs permissive lookup, alias unwrapping, AnyType flavor for default arguments) are now explicit in how they call _build_named_instance() instead of being duplicated in multiple ad hoc implementations. Future changes to how we resolve names or synthesize default type arguments only need to happen in one place. This continues the cleanup called out in issue #4157: reducing the number of slightly different lookup/type helpers and making the remaining ones more uniform. _build_named_instance() is internal-only and not part of the plugin API surface. --- mypy/semanal.py | 77 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index b71b852c9bd4..b76464759c61 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -6660,6 +6660,7 @@ def _lookup_fully_qualified( result = filenode.names.get(names[0]) if result is None and self.is_incomplete_namespace(module): + # TODO: More explicit handling of incomplete refs? self.record_incomplete_ref() # When the namespace is incomplete and we don't have a result, # return None immediately. If allow_missing=False, the caller @@ -6706,33 +6707,71 @@ def function_type(self) -> Instance: self._function_type = self.named_type("builtins.function") return self._function_type - def named_type(self, fullname: str, args: list[Type] | None = None) -> Instance: - sym = self.lookup_fully_qualified(fullname) - assert sym, "Internal error: attempted to construct unknown type" - node = sym.node - assert isinstance(node, TypeInfo), node - if args: - # TODO: assert len(args) == len(node.defn.type_vars) - return Instance(node, args) - return Instance(node, [AnyType(TypeOfAny.special_form)] * len(node.defn.type_vars)) + def _build_named_instance( + self, + fullname: str, + args: list[Type] | None, + *, + allow_missing: bool, + any_flavor: int, + unwrap_alias: bool, + ) -> Instance | None: + """Internal helper to construct Instances for fully-qualified names. + + This exists solely to remove duplication between the public helpers. It + doesn't change user-visible behavior and is not part of the public or + plugin APIs. + """ + if allow_missing: + sym = self.lookup_fully_qualified_or_none(fullname) + if not sym or isinstance(sym.node, PlaceholderNode): + return None + else: + sym = self.lookup_fully_qualified(fullname) - def named_type_or_none(self, fullname: str, args: list[Type] | None = None) -> Instance | None: - sym = self.lookup_fully_qualified_or_none(fullname) - if not sym or isinstance(sym.node, PlaceholderNode): - return None node = sym.node - if isinstance(node, TypeAlias): + if unwrap_alias and isinstance(node, TypeAlias): assert isinstance(node.target, Instance) # type: ignore[misc] node = node.target.type assert isinstance(node, TypeInfo), node - if args is not None: - # TODO: assert len(args) == len(node.defn.type_vars) - return Instance(node, args) - return Instance(node, [AnyType(TypeOfAny.unannotated)] * len(node.defn.type_vars)) + + inst_args = args + if inst_args is None: + inst_args = [AnyType(any_flavor)] * len(node.defn.type_vars) + # TODO: assert len(inst_args) == len(node.defn.type_vars) + return Instance(node, inst_args) + + def named_type(self, fullname: str, args: list[Type] | None = None) -> Instance: + inst = self._build_named_instance( + fullname, + args, + allow_missing=False, + any_flavor=TypeOfAny.special_form, + unwrap_alias=False, + ) + assert inst is not None, "Internal error: attempted to construct unknown type" + return inst + + def named_type_or_none(self, fullname: str, args: list[Type] | None = None) -> Instance | None: + return self._build_named_instance( + fullname, + args, + allow_missing=True, + any_flavor=TypeOfAny.unannotated, + unwrap_alias=True, + ) def builtin_type(self, fully_qualified_name: str) -> Instance: """Legacy function -- use named_type() instead.""" - return self.named_type(fully_qualified_name) + inst = self._build_named_instance( + fully_qualified_name, + None, + allow_missing=False, + any_flavor=TypeOfAny.special_form, + unwrap_alias=False, + ) + assert inst is not None + return inst def lookup_current_scope(self, name: str) -> SymbolTableNode | None: if self.locals[-1] is not None: From 08594d3e9888f252a3cfe0899b56a796c95f9ca2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Oct 2025 15:14:07 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypy/semanal.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index b76464759c61..f5684492286b 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -6754,11 +6754,7 @@ def named_type(self, fullname: str, args: list[Type] | None = None) -> Instance: def named_type_or_none(self, fullname: str, args: list[Type] | None = None) -> Instance | None: return self._build_named_instance( - fullname, - args, - allow_missing=True, - any_flavor=TypeOfAny.unannotated, - unwrap_alias=True, + fullname, args, allow_missing=True, any_flavor=TypeOfAny.unannotated, unwrap_alias=True ) def builtin_type(self, fully_qualified_name: str) -> Instance: