diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ce28a008..6851c610e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ Status of the `main` branch. Changes prior to the next official version change w at the Yarn-generated SDK. - `SvelteLanguageServer`: Fix diagnostics requests for TypeScript/JavaScript files incorrectly being processed by the Svelte LS instead of the TypeScript LS. + - `gopls`: Fix `replace_symbol_body` corrupting single `type`/`var`/`const` declarations by + duplicating the leading keyword (e.g. `type Foo` becoming `type type Foo`). gopls reports the + symbol range of such declarations starting at the identifier rather than the keyword (unlike + `func` declarations); the range is now extended to include the keyword so the body and the + replacement range stay consistent. * Dashboard: - Make list of trusted hosts configurable, fixing host validation introduced in v1.5.2 allowing only diff --git a/src/solidlsp/language_servers/gopls.py b/src/solidlsp/language_servers/gopls.py index 3c0981786..bf13fbcc4 100644 --- a/src/solidlsp/language_servers/gopls.py +++ b/src/solidlsp/language_servers/gopls.py @@ -3,13 +3,15 @@ import logging import os import pathlib +import re import subprocess from collections.abc import Hashable from typing import Any, cast from overrides import override -from solidlsp.ls import RawDocumentSymbol, SolidLanguageServer +from solidlsp import ls_types +from solidlsp.ls import DocumentSymbols, LSPFileBuffer, RawDocumentSymbol, SolidLanguageServer, SymbolBodyFactory from solidlsp.ls_config import LanguageServerConfig from solidlsp.lsp_protocol_handler.lsp_types import InitializeParams from solidlsp.lsp_protocol_handler.server import ProcessLaunchInfo @@ -23,6 +25,12 @@ class Gopls(SolidLanguageServer): Provides Go specific instantiation of the LanguageServer class using gopls. """ + # matches a line prefix that consists solely of a leading `type`/`var`/`const` declaration + # keyword (with optional indentation and the whitespace before the declared identifier). + # gopls reports the symbol range of such single declarations starting at the identifier, + # i.e. after the keyword, whereas `func` declarations include the `func` keyword. + _LEADING_DECL_KEYWORD_RE = re.compile(r"(?P\s*)(?:type|var|const)\s+") + @classmethod def supports_implementation_request(cls) -> bool: return True @@ -214,6 +222,92 @@ def _document_symbols_cache_fingerprint(self) -> Hashable: def _normalize_symbol_name(self, symbol: RawDocumentSymbol, relative_file_path: str) -> str: return symbol["name"].rsplit(".", 1)[-1] + @override + def request_document_symbols(self, relative_file_path: str, file_buffer: LSPFileBuffer | None = None) -> DocumentSymbols: + # Override to extend single `type`/`var`/`const` declaration ranges to include the leading + # keyword. gopls excludes the keyword from such ranges (unlike `func` declarations), which + # causes replace_symbol_body to drop the keyword from the symbol body and replacement range; + # a natural keyword-inclusive round-trip edit would then corrupt the file (e.g. `type Foo` + # becomes `type type Foo`). See _extend_go_symbol_range_to_include_leading_keyword. + document_symbols = super().request_document_symbols(relative_file_path, file_buffer=file_buffer) + if not document_symbols.root_symbols: + return document_symbols + + # obtain the file lines and a body factory to recompute the bodies of extended symbols + with self._open_file_context(relative_file_path, file_buffer, open_in_ls=False) as file_data: + file_lines = file_data.split_lines() + body_factory = SymbolBodyFactory(file_data) + + # extend ranges recursively, operating on copies so the cached symbols are not mutated. + # Children keep their `parent` back-pointer aimed at the original (un-extended) node; + # this is intentional and safe, because ancestor traversal only reads name/kind/overload + # index (see Symbol.iter_ancestors), none of which the extension changes. Rebinding the + # back-pointers would force a deep copy of every extended subtree for no observable gain. + def extend_symbol_and_children(symbol: ls_types.UnifiedSymbolInformation) -> ls_types.UnifiedSymbolInformation: + extended = self._extend_go_symbol_range_to_include_leading_keyword(symbol, file_lines, body_factory) + children = symbol.get("children") + if children: + if extended is symbol: + extended = symbol.copy() + extended["children"] = [extend_symbol_and_children(child) for child in children] + return extended + + extended_root_symbols = [extend_symbol_and_children(sym) for sym in document_symbols.root_symbols] + + return DocumentSymbols(extended_root_symbols) + + def _extend_go_symbol_range_to_include_leading_keyword( + self, + symbol: ls_types.UnifiedSymbolInformation, + file_lines: list[str], + body_factory: SymbolBodyFactory, + ) -> ls_types.UnifiedSymbolInformation: + """ + Extend a Go symbol's body range to include a leading `type`/`var`/`const` keyword. + + gopls reports the range of a single `type`/`var`/`const` declaration starting at the + declared identifier (after the keyword), whereas the range of a `func` declaration includes + the `func` keyword. This asymmetry makes :meth:`replace_symbol_body` omit the keyword from + both the displayed body and the replacement range, so re-supplying the keyword in an edit + corrupts the file (e.g. ``type Foo`` becomes ``type type Foo``). + + :param symbol: the symbol whose range may be extended. + :param file_lines: the lines of the file in which the symbol is defined. + :param body_factory: the factory used to recompute the symbol body from the extended range. + :return: a copy of the symbol with an extended range, or the original symbol if no leading + keyword precedes the identifier on the start line (e.g. for funcs or for grouped + declarations such as ``var ( ... )`` whose keyword sits on a separate line). + """ + # determine whether only a declaration keyword precedes the identifier on the start line + range_info = symbol["range"] + start_line = range_info["start"]["line"] + start_char = range_info["start"]["character"] + if start_line >= len(file_lines): + return symbol + prefix = file_lines[start_line][:start_char] + match = self._LEADING_DECL_KEYWORD_RE.fullmatch(prefix) + if match is None: + return symbol + + # extend the range start back to the keyword (excluding indentation), updating both the + # symbol range and its location range so the replacement range covers the keyword + new_start = ls_types.Position(line=start_line, character=len(match.group("indent"))) + extended = symbol.copy() + extended["range"] = ls_types.Range(start=new_start, end=range_info["end"]) + location = extended.get("location") + if location: + location = location.copy() + if "range" in location: + location["range"] = ls_types.Range(start=new_start, end=location["range"]["end"]) + extended["location"] = location + + # recompute the body from the now-extended location range so the displayed body stays + # consistent with the replacement range; the stale body must be removed first, since the + # factory returns an existing SymbolBody as-is and otherwise reads the updated location range + extended.pop("body", None) + extended["body"] = body_factory.create_symbol_body(extended) + return extended + def _start_server(self) -> None: """Start gopls server process""" diff --git a/test/resources/repos/go/test_repo/symbol_body.go b/test/resources/repos/go/test_repo/symbol_body.go new file mode 100644 index 000000000..604b98b95 --- /dev/null +++ b/test/resources/repos/go/test_repo/symbol_body.go @@ -0,0 +1,26 @@ +package main + +// BodyStruct is a single type declaration used to verify that replace_symbol_body +// includes the leading `type` keyword in the symbol body and replacement range. +type BodyStruct struct { + Value int +} + +// NamedInt is a single defined-type declaration (non-struct). +type NamedInt int + +// AliasInt is a type alias declaration. +type AliasInt = int + +// GlobalCounter is a single package-level var declaration. +var GlobalCounter int = 0 + +// MaxItems is a single package-level const declaration. +const MaxItems = 100 + +// GroupedA and GroupedB are declared in a grouped var block, where the `var` +// keyword is on a separate line and must NOT be folded into either symbol body. +var ( + GroupedA int = 1 + GroupedB string = "two" +) diff --git a/test/serena/__snapshots__/test_symbol_editing.ambr b/test/serena/__snapshots__/test_symbol_editing.ambr index ec23cf336..6b1786856 100644 --- a/test/serena/__snapshots__/test_symbol_editing.ambr +++ b/test/serena/__snapshots__/test_symbol_editing.ambr @@ -310,6 +310,37 @@ ''' # --- +# name: test_go_symbol_replacement_no_double_keyword + ''' + package main + + // BodyStruct is a single type declaration used to verify that replace_symbol_body + // includes the leading `type` keyword in the symbol body and replacement range. + type BodyStruct struct { + Value int + } + + // NamedInt is a single defined-type declaration (non-struct). + type NamedInt int64 + + // AliasInt is a type alias declaration. + type AliasInt = int + + // GlobalCounter is a single package-level var declaration. + var GlobalCounter int = 0 + + // MaxItems is a single package-level const declaration. + const MaxItems = 100 + + // GroupedA and GroupedB are declared in a grouped var block, where the `var` + // keyword is on a separate line and must NOT be folded into either symbol body. + var ( + GroupedA int = 1 + GroupedB string = "two" + ) + + ''' +# --- # name: test_insert_in_rel_to_symbol[test_case0-after] ''' """ diff --git a/test/serena/test_symbol_editing.py b/test/serena/test_symbol_editing.py index b6f8abf29..b56d504a8 100644 --- a/test/serena/test_symbol_editing.py +++ b/test/serena/test_symbol_editing.py @@ -474,6 +474,62 @@ def test_nix_symbol_replacement_no_double_semicolon(snapshot: SnapshotAssertion) test_case.run_test(content_after_ground_truth=snapshot) +# A single `type` declaration body that re-supplies the leading `type` keyword, as a user would +# after copying the body returned by find_symbol. Replacing the body with this must not duplicate +# the keyword. +GO_DECL_REPLACEMENT = """type NamedInt int64""" + + +class GoDeclReplacementTest(EditingTest): + """Test for replacing a single Go ``type``/``var``/``const`` declaration that should NOT result + in a duplicated leading keyword (e.g. ``type type NamedInt``). + """ + + def __init__(self, language: Language, rel_path: str, symbol_name: str, new_body: str): + super().__init__(language, rel_path) + self.symbol_name = symbol_name + self.new_body = new_body + + def _apply_edit(self, code_editor: CodeEditor) -> None: + code_editor.replace_body(self.symbol_name, self.rel_path, self.new_body) + + @overrides + def _test_diff(self, code_diff: CodeDiff, snapshot: SnapshotAssertion) -> None: + # the leading declaration keyword must appear exactly once (no `type type` corruption) + for keyword in ("type", "var", "const"): + assert f"{keyword} {keyword} " not in code_diff.modified_content, ( + f"Replacement duplicated the '{keyword}' keyword: {code_diff.modified_content!r}" + ) + return super()._test_diff(code_diff, snapshot) + + +@pytest.mark.go +@pytest.mark.skipif(sys.platform == "win32", reason="gopls is not provisioned on the Windows CI runner") +def test_go_symbol_replacement_no_double_keyword(snapshot: SnapshotAssertion): + """ + Test that replacing a Go ``type``/``var``/``const`` declaration does not duplicate the leading + keyword. + + This exercises the bug where: + - Original: type NamedInt int + - Replacement body (as displayed by find_symbol): type NamedInt int64 + - Bug result would be: type type NamedInt int64 (the original `type ` prefix is kept because the + gopls symbol range starts at the identifier, and the supplied body adds another `type`) + - Correct result should be: type NamedInt int64 + + gopls reports the symbol range of such declarations starting at the identifier (after the + keyword), unlike ``func`` declarations whose range includes the ``func`` keyword. The gopls + range extension prevents the duplicated keyword. + """ + test_case = GoDeclReplacementTest( + Language.GO, + "symbol_body.go", + "NamedInt", + GO_DECL_REPLACEMENT, + ) + test_case.run_test(content_after_ground_truth=snapshot) + + class RenameSymbolTest(EditingTest): def __init__(self, language: Language, rel_path: str, symbol_name: str, new_name: str): super().__init__(language, rel_path) diff --git a/test/solidlsp/go/test_go_basic.py b/test/solidlsp/go/test_go_basic.py index 347f60221..2388b78c1 100644 --- a/test/solidlsp/go/test_go_basic.py +++ b/test/solidlsp/go/test_go_basic.py @@ -48,6 +48,46 @@ def test_find_referencing_symbols(self, language_server: SolidLanguageServer) -> refs = language_server.request_references(file_path, sel_start["line"], sel_start["character"]) assert any("main.go" in ref.get("uri", "") for ref in refs), "Expected at least one reference result to point at main.go" + @pytest.mark.parametrize("language_server", [Language.GO], indirect=True) + def test_type_var_const_body_includes_leading_keyword(self, language_server: SolidLanguageServer) -> None: + """ + Single ``type``/``var``/``const`` declarations must expose a body and replacement range that + include the leading keyword, just like ``func`` declarations do. + + Regression test for gopls reporting the symbol range of such declarations starting at the + declared identifier (after the keyword) rather than at the keyword. That asymmetry made + replace_symbol_body drop the keyword from the body and replacement range, so a natural + keyword-inclusive round-trip edit corrupted the file (e.g. ``type Foo`` -> ``type type Foo``). + """ + all_symbols, _ = language_server.request_document_symbols("symbol_body.go").get_all_symbols_and_roots() + symbols_by_name = {sym.get("name"): sym for sym in all_symbols} + + # single declarations: body starts with the keyword, the range start moves to the keyword + # (column 0 here), and the selection range still points at the identifier after the keyword + expected_keyword_by_name = { + "BodyStruct": "type ", + "NamedInt": "type ", + "AliasInt": "type ", + "GlobalCounter": "var ", + "MaxItems": "const ", + } + for name, keyword in expected_keyword_by_name.items(): + sym = symbols_by_name.get(name) + assert sym is not None, f"{name} not found in symbol_body.go" + body = sym["body"].get_text() + assert body.startswith(keyword), f"Expected body of {name} to start with {keyword!r}, got {body[:24]!r}" + assert sym["location"]["range"]["start"]["character"] == 0, f"Expected {name} body range to start at the keyword (col 0)" + assert sym["selectionRange"]["start"]["character"] > 0, f"Expected {name} selectionRange to point at the identifier" + + # grouped declarations keep the keyword on a separate line (e.g. ``var ( ... )``), so their + # bodies must NOT include it and their ranges must be left untouched + for name in ("GroupedA", "GroupedB"): + sym = symbols_by_name.get(name) + assert sym is not None, f"{name} not found in symbol_body.go" + body = sym["body"].get_text() + assert body.startswith(name), f"Expected grouped var {name} body to start with the identifier, got {body[:24]!r}" + assert not body.startswith("var"), f"Grouped var {name} body must not include the 'var' keyword" + if language_has_verified_implementation_support(Language.GO): @pytest.mark.parametrize("language_server", [Language.GO], indirect=True)