From ec8cfe17ee5ef3818729e70fc9f926ec7d749ef6 Mon Sep 17 00:00:00 2001 From: "Tomi P. Hakala" Date: Sat, 30 May 2026 13:54:17 +0300 Subject: [PATCH] Fix gopls replace_symbol_body corrupting type/var/const declarations gopls reports the document-symbol range of single type/var/const declarations starting at the identifier (after the keyword), unlike func declarations whose range includes the func keyword. Serena derives both the displayed body and the replacement range from that range, so the keyword was dropped from both for these declarations and a keyword-inclusive edit corrupted the file (type Foo became type type Foo). Override request_document_symbols in gopls to extend the range start back to the keyword when only the keyword and indentation precede the identifier, recomputing the body so it stays consistent with the replacement range. Funcs, grouped declarations, and selectionRange are left untouched. Adds a unit test for the symbol body and range plus an end-to-end replace_symbol_body round-trip regression test. --- CHANGELOG.md | 5 + src/solidlsp/language_servers/gopls.py | 96 ++++++++++++++++++- .../repos/go/test_repo/symbol_body.go | 26 +++++ .../__snapshots__/test_symbol_editing.ambr | 31 ++++++ test/serena/test_symbol_editing.py | 56 +++++++++++ test/solidlsp/go/test_go_basic.py | 40 ++++++++ 6 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 test/resources/repos/go/test_repo/symbol_body.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ce28a008..6851c610e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ Status of the `main` branch. Changes prior to the next official version change w at the Yarn-generated SDK. - `SvelteLanguageServer`: Fix diagnostics requests for TypeScript/JavaScript files incorrectly being processed by the Svelte LS instead of the TypeScript LS. + - `gopls`: Fix `replace_symbol_body` corrupting single `type`/`var`/`const` declarations by + duplicating the leading keyword (e.g. `type Foo` becoming `type type Foo`). gopls reports the + symbol range of such declarations starting at the identifier rather than the keyword (unlike + `func` declarations); the range is now extended to include the keyword so the body and the + replacement range stay consistent. * Dashboard: - Make list of trusted hosts configurable, fixing host validation introduced in v1.5.2 allowing only diff --git a/src/solidlsp/language_servers/gopls.py b/src/solidlsp/language_servers/gopls.py index 3c0981786..bf13fbcc4 100644 --- a/src/solidlsp/language_servers/gopls.py +++ b/src/solidlsp/language_servers/gopls.py @@ -3,13 +3,15 @@ import logging import os import pathlib +import re import subprocess from collections.abc import Hashable from typing import Any, cast from overrides import override -from solidlsp.ls import RawDocumentSymbol, SolidLanguageServer +from solidlsp import ls_types +from solidlsp.ls import DocumentSymbols, LSPFileBuffer, RawDocumentSymbol, SolidLanguageServer, SymbolBodyFactory from solidlsp.ls_config import LanguageServerConfig from solidlsp.lsp_protocol_handler.lsp_types import InitializeParams from solidlsp.lsp_protocol_handler.server import ProcessLaunchInfo @@ -23,6 +25,12 @@ class Gopls(SolidLanguageServer): Provides Go specific instantiation of the LanguageServer class using gopls. """ + # matches a line prefix that consists solely of a leading `type`/`var`/`const` declaration + # keyword (with optional indentation and the whitespace before the declared identifier). + # gopls reports the symbol range of such single declarations starting at the identifier, + # i.e. after the keyword, whereas `func` declarations include the `func` keyword. + _LEADING_DECL_KEYWORD_RE = re.compile(r"(?P\s*)(?:type|var|const)\s+") + @classmethod def supports_implementation_request(cls) -> bool: return True @@ -214,6 +222,92 @@ def _document_symbols_cache_fingerprint(self) -> Hashable: def _normalize_symbol_name(self, symbol: RawDocumentSymbol, relative_file_path: str) -> str: return symbol["name"].rsplit(".", 1)[-1] + @override + def request_document_symbols(self, relative_file_path: str, file_buffer: LSPFileBuffer | None = None) -> DocumentSymbols: + # Override to extend single `type`/`var`/`const` declaration ranges to include the leading + # keyword. gopls excludes the keyword from such ranges (unlike `func` declarations), which + # causes replace_symbol_body to drop the keyword from the symbol body and replacement range; + # a natural keyword-inclusive round-trip edit would then corrupt the file (e.g. `type Foo` + # becomes `type type Foo`). See _extend_go_symbol_range_to_include_leading_keyword. + document_symbols = super().request_document_symbols(relative_file_path, file_buffer=file_buffer) + if not document_symbols.root_symbols: + return document_symbols + + # obtain the file lines and a body factory to recompute the bodies of extended symbols + with self._open_file_context(relative_file_path, file_buffer, open_in_ls=False) as file_data: + file_lines = file_data.split_lines() + body_factory = SymbolBodyFactory(file_data) + + # extend ranges recursively, operating on copies so the cached symbols are not mutated. + # Children keep their `parent` back-pointer aimed at the original (un-extended) node; + # this is intentional and safe, because ancestor traversal only reads name/kind/overload + # index (see Symbol.iter_ancestors), none of which the extension changes. Rebinding the + # back-pointers would force a deep copy of every extended subtree for no observable gain. + def extend_symbol_and_children(symbol: ls_types.UnifiedSymbolInformation) -> ls_types.UnifiedSymbolInformation: + extended = self._extend_go_symbol_range_to_include_leading_keyword(symbol, file_lines, body_factory) + children = symbol.get("children") + if children: + if extended is symbol: + extended = symbol.copy() + extended["children"] = [extend_symbol_and_children(child) for child in children] + return extended + + extended_root_symbols = [extend_symbol_and_children(sym) for sym in document_symbols.root_symbols] + + return DocumentSymbols(extended_root_symbols) + + def _extend_go_symbol_range_to_include_leading_keyword( + self, + symbol: ls_types.UnifiedSymbolInformation, + file_lines: list[str], + body_factory: SymbolBodyFactory, + ) -> ls_types.UnifiedSymbolInformation: + """ + Extend a Go symbol's body range to include a leading `type`/`var`/`const` keyword. + + gopls reports the range of a single `type`/`var`/`const` declaration starting at the + declared identifier (after the keyword), whereas the range of a `func` declaration includes + the `func` keyword. This asymmetry makes :meth:`replace_symbol_body` omit the keyword from + both the displayed body and the replacement range, so re-supplying the keyword in an edit + corrupts the file (e.g. ``type Foo`` becomes ``type type Foo``). + + :param symbol: the symbol whose range may be extended. + :param file_lines: the lines of the file in which the symbol is defined. + :param body_factory: the factory used to recompute the symbol body from the extended range. + :return: a copy of the symbol with an extended range, or the original symbol if no leading + keyword precedes the identifier on the start line (e.g. for funcs or for grouped + declarations such as ``var ( ... )`` whose keyword sits on a separate line). + """ + # determine whether only a declaration keyword precedes the identifier on the start line + range_info = symbol["range"] + start_line = range_info["start"]["line"] + start_char = range_info["start"]["character"] + if start_line >= len(file_lines): + return symbol + prefix = file_lines[start_line][:start_char] + match = self._LEADING_DECL_KEYWORD_RE.fullmatch(prefix) + if match is None: + return symbol + + # extend the range start back to the keyword (excluding indentation), updating both the + # symbol range and its location range so the replacement range covers the keyword + new_start = ls_types.Position(line=start_line, character=len(match.group("indent"))) + extended = symbol.copy() + extended["range"] = ls_types.Range(start=new_start, end=range_info["end"]) + location = extended.get("location") + if location: + location = location.copy() + if "range" in location: + location["range"] = ls_types.Range(start=new_start, end=location["range"]["end"]) + extended["location"] = location + + # recompute the body from the now-extended location range so the displayed body stays + # consistent with the replacement range; the stale body must be removed first, since the + # factory returns an existing SymbolBody as-is and otherwise reads the updated location range + extended.pop("body", None) + extended["body"] = body_factory.create_symbol_body(extended) + return extended + def _start_server(self) -> None: """Start gopls server process""" diff --git a/test/resources/repos/go/test_repo/symbol_body.go b/test/resources/repos/go/test_repo/symbol_body.go new file mode 100644 index 000000000..604b98b95 --- /dev/null +++ b/test/resources/repos/go/test_repo/symbol_body.go @@ -0,0 +1,26 @@ +package main + +// BodyStruct is a single type declaration used to verify that replace_symbol_body +// includes the leading `type` keyword in the symbol body and replacement range. +type BodyStruct struct { + Value int +} + +// NamedInt is a single defined-type declaration (non-struct). +type NamedInt int + +// AliasInt is a type alias declaration. +type AliasInt = int + +// GlobalCounter is a single package-level var declaration. +var GlobalCounter int = 0 + +// MaxItems is a single package-level const declaration. +const MaxItems = 100 + +// GroupedA and GroupedB are declared in a grouped var block, where the `var` +// keyword is on a separate line and must NOT be folded into either symbol body. +var ( + GroupedA int = 1 + GroupedB string = "two" +) diff --git a/test/serena/__snapshots__/test_symbol_editing.ambr b/test/serena/__snapshots__/test_symbol_editing.ambr index ec23cf336..6b1786856 100644 --- a/test/serena/__snapshots__/test_symbol_editing.ambr +++ b/test/serena/__snapshots__/test_symbol_editing.ambr @@ -310,6 +310,37 @@ ''' # --- +# name: test_go_symbol_replacement_no_double_keyword + ''' + package main + + // BodyStruct is a single type declaration used to verify that replace_symbol_body + // includes the leading `type` keyword in the symbol body and replacement range. + type BodyStruct struct { + Value int + } + + // NamedInt is a single defined-type declaration (non-struct). + type NamedInt int64 + + // AliasInt is a type alias declaration. + type AliasInt = int + + // GlobalCounter is a single package-level var declaration. + var GlobalCounter int = 0 + + // MaxItems is a single package-level const declaration. + const MaxItems = 100 + + // GroupedA and GroupedB are declared in a grouped var block, where the `var` + // keyword is on a separate line and must NOT be folded into either symbol body. + var ( + GroupedA int = 1 + GroupedB string = "two" + ) + + ''' +# --- # name: test_insert_in_rel_to_symbol[test_case0-after] ''' """ diff --git a/test/serena/test_symbol_editing.py b/test/serena/test_symbol_editing.py index b6f8abf29..b56d504a8 100644 --- a/test/serena/test_symbol_editing.py +++ b/test/serena/test_symbol_editing.py @@ -474,6 +474,62 @@ def test_nix_symbol_replacement_no_double_semicolon(snapshot: SnapshotAssertion) test_case.run_test(content_after_ground_truth=snapshot) +# A single `type` declaration body that re-supplies the leading `type` keyword, as a user would +# after copying the body returned by find_symbol. Replacing the body with this must not duplicate +# the keyword. +GO_DECL_REPLACEMENT = """type NamedInt int64""" + + +class GoDeclReplacementTest(EditingTest): + """Test for replacing a single Go ``type``/``var``/``const`` declaration that should NOT result + in a duplicated leading keyword (e.g. ``type type NamedInt``). + """ + + def __init__(self, language: Language, rel_path: str, symbol_name: str, new_body: str): + super().__init__(language, rel_path) + self.symbol_name = symbol_name + self.new_body = new_body + + def _apply_edit(self, code_editor: CodeEditor) -> None: + code_editor.replace_body(self.symbol_name, self.rel_path, self.new_body) + + @overrides + def _test_diff(self, code_diff: CodeDiff, snapshot: SnapshotAssertion) -> None: + # the leading declaration keyword must appear exactly once (no `type type` corruption) + for keyword in ("type", "var", "const"): + assert f"{keyword} {keyword} " not in code_diff.modified_content, ( + f"Replacement duplicated the '{keyword}' keyword: {code_diff.modified_content!r}" + ) + return super()._test_diff(code_diff, snapshot) + + +@pytest.mark.go +@pytest.mark.skipif(sys.platform == "win32", reason="gopls is not provisioned on the Windows CI runner") +def test_go_symbol_replacement_no_double_keyword(snapshot: SnapshotAssertion): + """ + Test that replacing a Go ``type``/``var``/``const`` declaration does not duplicate the leading + keyword. + + This exercises the bug where: + - Original: type NamedInt int + - Replacement body (as displayed by find_symbol): type NamedInt int64 + - Bug result would be: type type NamedInt int64 (the original `type ` prefix is kept because the + gopls symbol range starts at the identifier, and the supplied body adds another `type`) + - Correct result should be: type NamedInt int64 + + gopls reports the symbol range of such declarations starting at the identifier (after the + keyword), unlike ``func`` declarations whose range includes the ``func`` keyword. The gopls + range extension prevents the duplicated keyword. + """ + test_case = GoDeclReplacementTest( + Language.GO, + "symbol_body.go", + "NamedInt", + GO_DECL_REPLACEMENT, + ) + test_case.run_test(content_after_ground_truth=snapshot) + + class RenameSymbolTest(EditingTest): def __init__(self, language: Language, rel_path: str, symbol_name: str, new_name: str): super().__init__(language, rel_path) diff --git a/test/solidlsp/go/test_go_basic.py b/test/solidlsp/go/test_go_basic.py index 347f60221..2388b78c1 100644 --- a/test/solidlsp/go/test_go_basic.py +++ b/test/solidlsp/go/test_go_basic.py @@ -48,6 +48,46 @@ def test_find_referencing_symbols(self, language_server: SolidLanguageServer) -> refs = language_server.request_references(file_path, sel_start["line"], sel_start["character"]) assert any("main.go" in ref.get("uri", "") for ref in refs), "Expected at least one reference result to point at main.go" + @pytest.mark.parametrize("language_server", [Language.GO], indirect=True) + def test_type_var_const_body_includes_leading_keyword(self, language_server: SolidLanguageServer) -> None: + """ + Single ``type``/``var``/``const`` declarations must expose a body and replacement range that + include the leading keyword, just like ``func`` declarations do. + + Regression test for gopls reporting the symbol range of such declarations starting at the + declared identifier (after the keyword) rather than at the keyword. That asymmetry made + replace_symbol_body drop the keyword from the body and replacement range, so a natural + keyword-inclusive round-trip edit corrupted the file (e.g. ``type Foo`` -> ``type type Foo``). + """ + all_symbols, _ = language_server.request_document_symbols("symbol_body.go").get_all_symbols_and_roots() + symbols_by_name = {sym.get("name"): sym for sym in all_symbols} + + # single declarations: body starts with the keyword, the range start moves to the keyword + # (column 0 here), and the selection range still points at the identifier after the keyword + expected_keyword_by_name = { + "BodyStruct": "type ", + "NamedInt": "type ", + "AliasInt": "type ", + "GlobalCounter": "var ", + "MaxItems": "const ", + } + for name, keyword in expected_keyword_by_name.items(): + sym = symbols_by_name.get(name) + assert sym is not None, f"{name} not found in symbol_body.go" + body = sym["body"].get_text() + assert body.startswith(keyword), f"Expected body of {name} to start with {keyword!r}, got {body[:24]!r}" + assert sym["location"]["range"]["start"]["character"] == 0, f"Expected {name} body range to start at the keyword (col 0)" + assert sym["selectionRange"]["start"]["character"] > 0, f"Expected {name} selectionRange to point at the identifier" + + # grouped declarations keep the keyword on a separate line (e.g. ``var ( ... )``), so their + # bodies must NOT include it and their ranges must be left untouched + for name in ("GroupedA", "GroupedB"): + sym = symbols_by_name.get(name) + assert sym is not None, f"{name} not found in symbol_body.go" + body = sym["body"].get_text() + assert body.startswith(name), f"Expected grouped var {name} body to start with the identifier, got {body[:24]!r}" + assert not body.startswith("var"), f"Grouped var {name} body must not include the 'var' keyword" + if language_has_verified_implementation_support(Language.GO): @pytest.mark.parametrize("language_server", [Language.GO], indirect=True)