From 673436af26ce3cc403a19fc3268517b89f267cbc Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 13 May 2026 04:58:56 +0000 Subject: [PATCH] feat(search): require versioned index envs, derive template + read patterns - New pkg/searchindex.StripVersion helper splits "-v" inputs. - search-sync-worker: make MSG_INDEX_PREFIX, SPOTLIGHT_INDEX, USER_ROOM_INDEX all required. Validate "-v$" on messages/spotlight at startup. USER_ROOM_INDEX is intentionally unversioned (ES terms_lookup rejects wildcards and the index is rebuildable from Mongo). - messages and spotlight: derive template name + index_patterns from the stripped base so future v2/v3 indices inherit the same template; write target stays the full versioned name. - user-room: derive template name from env value; index_patterns is the exact single index, matching the terms_lookup constraint. - search-service: validate SEARCH_SPOTLIGHT_INDEX "-v" suffix and derive a "-*" read pattern so queries span versions during reindex windows. SEARCH_USER_ROOM_INDEX flows through unchanged for use in terms_lookup. Drop dead UserRoomIndex constant and resolveUserRoomIndex empty-string fallback now that the env is required. - docker-compose: set the new env values for both services. https://claude.ai/code/session_01E2Cc6bj5Qo8GsYwZV8YEC2 --- .../plans/2026-05-13-search-index-env-vars.md | 862 ++++++++++++++++++ pkg/searchindex/version.go | 29 + pkg/searchindex/version_test.go | 83 ++ search-service/deploy/docker-compose.yml | 4 +- search-service/handler.go | 4 +- search-service/handler_test.go | 4 +- search-service/integration_test.go | 15 +- search-service/main.go | 11 +- search-service/query_messages.go | 9 +- search-service/query_messages_test.go | 14 +- search-service/store_es.go | 18 +- search-service/store_es_test.go | 9 - search-sync-worker/deploy/docker-compose.yml | 2 + search-sync-worker/inbox_integration_test.go | 4 +- search-sync-worker/main.go | 20 +- search-sync-worker/messages.go | 5 +- search-sync-worker/messages_test.go | 14 +- search-sync-worker/spotlight.go | 12 +- search-sync-worker/spotlight_test.go | 24 +- search-sync-worker/user_room.go | 2 +- search-sync-worker/user_room_test.go | 20 +- 21 files changed, 1078 insertions(+), 87 deletions(-) create mode 100644 docs/superpowers/plans/2026-05-13-search-index-env-vars.md create mode 100644 pkg/searchindex/version.go create mode 100644 pkg/searchindex/version_test.go diff --git a/docs/superpowers/plans/2026-05-13-search-index-env-vars.md b/docs/superpowers/plans/2026-05-13-search-index-env-vars.md new file mode 100644 index 000000000..d144cbf4a --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-search-index-env-vars.md @@ -0,0 +1,862 @@ +# Search Index Env Vars: Required + Versioned Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make every search index env var on `search-sync-worker` and `search-service` required (no silent-fallback defaults), enforce a consistent `-v` versioning contract on the two versionable indices (messages, spotlight), and keep user-room unversioned because ES `terms_lookup` cannot accept a wildcard pattern. + +**Architecture:** +- New shared helper `pkg/searchindex.StripVersion(name)` returns `(base, version, ok)` by stripping a `-v\d+$` suffix. +- `search-sync-worker` makes all three index envs required, validates `-v` suffix on messages and spotlight, and derives ES template names + `index_patterns` from the stripped base so future v2/v3 indices auto-inherit the template. +- `search-service` keeps its envs required, uses `SEARCH_USER_ROOM_INDEX` as-is for `terms_lookup` (single concrete index, ES constraint), and strips the version off `SEARCH_SPOTLIGHT_INDEX` at startup to build a `-*` read pattern. Messages search pattern stays hardcoded for cross-cluster federation. +- Dead-code cleanup: drop the `UserRoomIndex` constant + `resolveUserRoomIndex` fallback in `search-service` now that the env is no longer optional. +- docker-compose files updated so a fresh `docker compose up` brings everything up with the new contract. + +**Tech Stack:** Go 1.25, `caarlos0/env/v11`, `nats.go/jetstream`, Elasticsearch (`pkg/searchengine`), Testify, mockgen. All commands run via the root Makefile. + +--- + +## File Structure + +| File | Action | Purpose | +|---|---|---| +| `pkg/searchindex/version.go` | Create | `StripVersion(name) (base string, version int, ok bool)` helper, regex `-v\d+$` | +| `pkg/searchindex/version_test.go` | Create | Table-driven tests for `StripVersion` covering suffix present/absent, bad suffixes, edge cases | +| `search-sync-worker/main.go` | Modify | Remove `envDefault:""` + fallback blocks; mark spotlight/user-room `,required`; add startup validation for `-v` on messages + spotlight | +| `search-sync-worker/messages.go` | Modify | Derive template name + `index_patterns` from `StripVersion(prefix)` base, not raw prefix | +| `search-sync-worker/messages_test.go` | Modify | Update template-name + pattern assertions for new derived values | +| `search-sync-worker/spotlight.go` | Modify | Same: derive template name + pattern from base, write target stays the full versioned env value | +| `search-sync-worker/spotlight_test.go` | Modify | Update template-name + pattern assertions | +| `search-sync-worker/user_room.go` | Modify | Template name = `_template`; `index_patterns` = `[envValue]` (single exact name) — encodes "intentionally unversioned" | +| `search-sync-worker/user_room_test.go` | Modify | Update template-name assertion | +| `search-sync-worker/deploy/docker-compose.yml` | Modify | Add `SPOTLIGHT_INDEX` and `USER_ROOM_INDEX` env vars (previously omitted, relied on fallbacks) | +| `search-service/main.go` | Modify | Validate `-v` on `SEARCH_SPOTLIGHT_INDEX`; compute spotlight read pattern at startup; pass it into handler | +| `search-service/handler.go` | Modify | Rename `SpotlightIndex` field to `SpotlightReadPattern` to reflect its new semantics; the value passed in is `-*` | +| `search-service/handler_test.go` | Modify | Update field name + expected search-index assertion | +| `search-service/store_es.go` | Modify | Delete `const UserRoomIndex` and `resolveUserRoomIndex`; constructor drops the fallback | +| `search-service/store_es_test.go` | Modify | Remove the test that asserted the fallback | +| `search-service/query_messages.go` | Modify | Drop the `resolveUserRoomIndex(...)` call in `buildMessageQuery`; caller is now responsible | +| `search-service/query_messages_test.go` | Modify | Drop the resolver dependency; pass concrete index name directly | +| `search-service/integration_test.go` | Modify | Update test fixture index names to versioned form for spotlight; rename `UserRoomIndex` constant references to use the test config value | +| `search-service/deploy/docker-compose.yml` | Modify | Drop trailing `-chat` from `SEARCH_SPOTLIGHT_INDEX` so it conforms to `-v$` | + +--- + +## Task 1: Create `pkg/searchindex` package + +**Files:** +- Create: `pkg/searchindex/version.go` +- Test: `pkg/searchindex/version_test.go` + +- [ ] **Step 1: Write the failing test** + +Create `pkg/searchindex/version_test.go`: + +```go +package searchindex + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestStripVersion(t *testing.T) { + tests := []struct { + name string + input string + wantBase string + wantVersion int + wantOK bool + }{ + { + name: "single digit version", + input: "messages-site-a-v1", + wantBase: "messages-site-a", + wantVersion: 1, + wantOK: true, + }, + { + name: "multi digit version", + input: "spotlight-site-b-v42", + wantBase: "spotlight-site-b", + wantVersion: 42, + wantOK: true, + }, + { + name: "no suffix returns input unchanged", + input: "user-room-mv-site-a", + wantBase: "user-room-mv-site-a", + wantVersion: 0, + wantOK: false, + }, + { + name: "uppercase V is not stripped", + input: "messages-site-a-V1", + wantBase: "messages-site-a-V1", + wantVersion: 0, + wantOK: false, + }, + { + name: "non-numeric tail is not stripped", + input: "messages-site-a-v1a", + wantBase: "messages-site-a-v1a", + wantVersion: 0, + wantOK: false, + }, + { + name: "version in middle is not stripped", + input: "messages-v1-site-a", + wantBase: "messages-v1-site-a", + wantVersion: 0, + wantOK: false, + }, + { + name: "empty input", + input: "", + wantBase: "", + wantVersion: 0, + wantOK: false, + }, + { + name: "only version suffix", + input: "-v1", + wantBase: "", + wantVersion: 1, + wantOK: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + base, version, ok := StripVersion(tc.input) + assert.Equal(t, tc.wantBase, base) + assert.Equal(t, tc.wantVersion, version) + assert.Equal(t, tc.wantOK, ok) + }) + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./pkg/searchindex/... -run TestStripVersion -v` +Expected: FAIL — `pkg/searchindex` does not exist yet. + +- [ ] **Step 3: Write minimal implementation** + +Create `pkg/searchindex/version.go`: + +```go +// Package searchindex contains small helpers shared by services that read +// and write Elasticsearch indices managed by search-sync-worker. Today it +// only carries the StripVersion helper used to derive a base index name +// from a versioned write target so callers can construct read patterns +// like "-*" without each service reinventing the parsing rule. +package searchindex + +import ( + "regexp" + "strconv" +) + +var versionSuffix = regexp.MustCompile(`-v(\d+)$`) + +// StripVersion splits a versioned index name like "messages-site-a-v1" +// into its base ("messages-site-a") and integer version (1). When the +// input does not end in a -v suffix (e.g. user-room indices that are +// intentionally unversioned), the input is returned unchanged with +// ok=false so callers can treat the value as a literal index name. +// +// Matching is anchored to the end of the string and accepts only lowercase +// `v` followed by one or more digits. Anything else (uppercase V, trailing +// alpha characters, version tokens embedded mid-name) is rejected so the +// helper stays predictable. +func StripVersion(name string) (base string, version int, ok bool) { + m := versionSuffix.FindStringSubmatchIndex(name) + if m == nil { + return name, 0, false + } + v, err := strconv.Atoi(name[m[2]:m[3]]) + if err != nil { + return name, 0, false + } + return name[:m[0]], v, true +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `go test ./pkg/searchindex/... -run TestStripVersion -v` +Expected: PASS, all 8 subtests green. + +- [ ] **Step 5: Commit** + +```bash +git add pkg/searchindex/version.go pkg/searchindex/version_test.go +git commit -m "feat(searchindex): add StripVersion helper for derived base names" +``` + +--- + +## Task 2: Make search-sync-worker envs required and validate version suffix + +**Files:** +- Modify: `search-sync-worker/main.go:42-53` (config struct), `:91-96` (delete fallback block) + +- [ ] **Step 1: Update the config struct** + +Replace lines 51-53 of `search-sync-worker/main.go`: + +```go + MsgIndexPrefix string `env:"MSG_INDEX_PREFIX,required"` + SpotlightIndex string `env:"SPOTLIGHT_INDEX,required"` + UserRoomIndex string `env:"USER_ROOM_INDEX,required"` +``` + +with the same three lines but make all three `,required` (MsgIndexPrefix already is; spotlight/user-room change). After this step the three lines should read exactly as shown above. + +- [ ] **Step 2: Delete the fallback block** + +Delete lines 91-96 of `search-sync-worker/main.go` entirely: + +```go + if cfg.SpotlightIndex == "" { + cfg.SpotlightIndex = fmt.Sprintf("spotlight-%s-v1-chat", cfg.SiteID) + } + if cfg.UserRoomIndex == "" { + cfg.UserRoomIndex = fmt.Sprintf("user-room-%s", cfg.SiteID) + } +``` + +If after deletion `fmt` is no longer used in `main.go`, also remove it from the import block. Quick check: `fmt.Sprintf` is used elsewhere in main.go for error messages? Search the file before deleting the import. If `fmt` is unused, `make lint` will catch it. + +- [ ] **Step 3: Add version-suffix validation** + +Immediately after the existing `cfg.BulkFlushInterval <= 0` check (around line 115 of `main.go`), append: + +```go + if _, _, ok := searchindex.StripVersion(cfg.MsgIndexPrefix); !ok { + slog.Error("invalid config", "name", "MSG_INDEX_PREFIX", "value", cfg.MsgIndexPrefix, "reason", "must end with -v, e.g. messages-site-a-v1") + os.Exit(1) + } + if _, _, ok := searchindex.StripVersion(cfg.SpotlightIndex); !ok { + slog.Error("invalid config", "name", "SPOTLIGHT_INDEX", "value", cfg.SpotlightIndex, "reason", "must end with -v, e.g. spotlight-site-a-v1") + os.Exit(1) + } +``` + +Note: `USER_ROOM_INDEX` is intentionally NOT validated for a `-v` suffix — this index is rebuilt from MongoDB rather than ES `_reindex` and the value is passed verbatim into ES `terms_lookup`, which rejects wildcards. Encoding "unversioned" in the env shape is intentional. + +Add the import for `github.com/hmchangw/chat/pkg/searchindex` to the import block at the top of `main.go`. + +- [ ] **Step 4: Build to confirm it compiles** + +Run: `make build SERVICE=search-sync-worker` +Expected: build succeeds. + +- [ ] **Step 5: Commit** + +```bash +git add search-sync-worker/main.go +git commit -m "feat(search-sync-worker): require index envs and validate -v suffix" +``` + +--- + +## Task 3: Derive messages template name + index_patterns from stripped base + +**Files:** +- Modify: `search-sync-worker/messages.go:41-46`, `:147-150` +- Test: `search-sync-worker/messages_test.go` + +- [ ] **Step 1: Update the failing test first** + +In `search-sync-worker/messages_test.go`, find the test asserting `TemplateName()` (likely named `TestMessageCollection_TemplateName` or similar). Replace whatever it asserts today with: + +```go +func TestMessageCollection_TemplateName_StripsVersion(t *testing.T) { + c := newMessageCollection("messages-site-a-v1") + assert.Equal(t, "messages-site-a_template", c.TemplateName()) +} + +func TestMessageCollection_TemplateName_BareBaseFallback(t *testing.T) { + // Defensive: if a non-versioned value reaches the collection (validation + // is enforced upstream in main.go), TemplateName must still produce a + // stable, valid ES template name rather than panicking. + c := newMessageCollection("messages-site-a") + assert.Equal(t, "messages-site-a_template", c.TemplateName()) +} + +func TestMessageCollection_TemplateBody_PatternStripsVersion(t *testing.T) { + c := newMessageCollection("messages-site-a-v1") + body := c.TemplateBody() + var decoded map[string]any + require.NoError(t, json.Unmarshal(body, &decoded)) + patterns, ok := decoded["index_patterns"].([]any) + require.True(t, ok) + require.Len(t, patterns, 1) + assert.Equal(t, "messages-site-a-*", patterns[0]) +} +``` + +Make sure the file imports `encoding/json` and `github.com/stretchr/testify/require` if they aren't already in the test file. + +- [ ] **Step 2: Run test to verify it fails** + +Run: `make test SERVICE=search-sync-worker` +Expected: FAIL on the three new tests — current code returns `messages-site-a-v1_template` and pattern `messages-site-a-v1-*`. + +- [ ] **Step 3: Update messages.go to strip version** + +Edit `search-sync-worker/messages.go`. Find: + +```go +func (c *messageCollection) TemplateName() string { + return fmt.Sprintf("%s_template", c.indexPrefix) +} +``` + +Replace with: + +```go +func (c *messageCollection) TemplateName() string { + base, _, _ := searchindex.StripVersion(c.indexPrefix) + return fmt.Sprintf("%s_template", base) +} +``` + +Find: + +```go +func messageTemplateBody(prefix string) json.RawMessage { + ... + "index_patterns": []string{fmt.Sprintf("%s-*", prefix)}, +``` + +Replace the `index_patterns` line with: + +```go + "index_patterns": []string{fmt.Sprintf("%s-*", stripMsgVersion(prefix))}, +``` + +And add a small helper at the bottom of the file: + +```go +// stripMsgVersion drops a trailing -v from a messages index prefix so the +// template's index_patterns match both the current version's monthly indices +// (e.g. messages-site-a-v1-2026-05) AND any future versions (v2-*, v3-*). +func stripMsgVersion(prefix string) string { + base, _, _ := searchindex.StripVersion(prefix) + return base +} +``` + +Add the import for `github.com/hmchangw/chat/pkg/searchindex`. + +Note: the **write target** for messages still uses `c.indexPrefix` unchanged (so monthly indices become `messages-site-a-v1-2026-05`). Only the template name and `index_patterns` are derived from the stripped base. Verify by grepping `messages.go` for `c.indexPrefix` — every use that constructs a write index name should keep using the full versioned prefix. + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `make test SERVICE=search-sync-worker` +Expected: all messages tests green. + +- [ ] **Step 5: Commit** + +```bash +git add search-sync-worker/messages.go search-sync-worker/messages_test.go +git commit -m "feat(search-sync-worker): derive messages template name/pattern from stripped base" +``` + +--- + +## Task 4: Derive spotlight template name + index_patterns from stripped base + +**Files:** +- Modify: `search-sync-worker/spotlight.go:30-36`, `:126-135` +- Test: `search-sync-worker/spotlight_test.go` + +- [ ] **Step 1: Update the failing test first** + +In `search-sync-worker/spotlight_test.go`, find existing template-name and template-body assertions and replace with: + +```go +func TestSpotlightCollection_TemplateName_StripsVersion(t *testing.T) { + c := newSpotlightCollection("spotlight-site-a-v1") + assert.Equal(t, "spotlight-site-a_template", c.TemplateName()) +} + +func TestSpotlightCollection_TemplateBody_PatternStripsVersion(t *testing.T) { + c := newSpotlightCollection("spotlight-site-a-v1") + body := c.TemplateBody() + var decoded map[string]any + require.NoError(t, json.Unmarshal(body, &decoded)) + patterns, ok := decoded["index_patterns"].([]any) + require.True(t, ok) + require.Len(t, patterns, 1) + assert.Equal(t, "spotlight-site-a-*", patterns[0]) +} +``` + +If the existing tests assert different values, delete them — they're encoding the old contract. + +- [ ] **Step 2: Run test to verify it fails** + +Run: `make test SERVICE=search-sync-worker` +Expected: FAIL — current code returns `"spotlight_template"` and pattern `[spotlight-site-a-v1]`. + +- [ ] **Step 3: Update spotlight.go** + +Replace lines 30-36 of `search-sync-worker/spotlight.go`: + +```go +func (c *spotlightCollection) TemplateName() string { + base, _, _ := searchindex.StripVersion(c.indexName) + return fmt.Sprintf("%s_template", base) +} + +func (c *spotlightCollection) TemplateBody() json.RawMessage { + return spotlightTemplateBody(c.indexName) +} +``` + +Find `spotlightTemplateBody` (around line 130) and change the `index_patterns` line from: + +```go + "index_patterns": []string{indexName}, +``` + +to: + +```go + "index_patterns": []string{fmt.Sprintf("%s-*", stripSpotlightVersion(indexName))}, +``` + +Add helper at the bottom of `spotlight.go`: + +```go +// stripSpotlightVersion drops a trailing -v from a spotlight index name +// so the template's index_patterns match the current and any future +// versioned indices (spotlight-{site}-v1, v2, v3, ...). +func stripSpotlightVersion(name string) string { + base, _, _ := searchindex.StripVersion(name) + return base +} +``` + +Add import `github.com/hmchangw/chat/pkg/searchindex`. + +The write target (where docs are indexed) continues to use `c.indexName` unchanged — that's the concrete versioned index like `spotlight-site-a-v1`. + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `make test SERVICE=search-sync-worker` +Expected: spotlight tests green; all other worker tests still green. + +- [ ] **Step 5: Commit** + +```bash +git add search-sync-worker/spotlight.go search-sync-worker/spotlight_test.go +git commit -m "feat(search-sync-worker): derive spotlight template name/pattern from stripped base" +``` + +--- + +## Task 5: Update user-room collection template name (no versioning) + +**Files:** +- Modify: `search-sync-worker/user_room.go:39-45` +- Test: `search-sync-worker/user_room_test.go` + +- [ ] **Step 1: Update the failing test** + +In `search-sync-worker/user_room_test.go`, replace the existing template-name assertion with: + +```go +func TestUserRoomCollection_TemplateName_DerivesFromEnv(t *testing.T) { + c := newUserRoomCollection("user-room-mv-site-a") + assert.Equal(t, "user-room-mv-site-a_template", c.TemplateName()) +} + +func TestUserRoomCollection_TemplateBody_PatternIsExactName(t *testing.T) { + // User-room is intentionally unversioned: ES terms_lookup forbids + // wildcards, so the template matches exactly one concrete index. + c := newUserRoomCollection("user-room-mv-site-a") + body := c.TemplateBody() + var decoded map[string]any + require.NoError(t, json.Unmarshal(body, &decoded)) + patterns, ok := decoded["index_patterns"].([]any) + require.True(t, ok) + require.Len(t, patterns, 1) + assert.Equal(t, "user-room-mv-site-a", patterns[0]) +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `make test SERVICE=search-sync-worker` +Expected: FAIL — current code returns hardcoded `"user_room_template"`. + +- [ ] **Step 3: Update user_room.go** + +Replace lines 39-45 of `search-sync-worker/user_room.go`: + +```go +func (c *userRoomCollection) TemplateName() string { + return fmt.Sprintf("%s_template", c.indexName) +} + +func (c *userRoomCollection) TemplateBody() json.RawMessage { + return userRoomTemplateBody(c.indexName) +} +``` + +`userRoomTemplateBody` already sets `index_patterns: []string{indexName}` (the exact-name behavior we want), so no changes needed there. + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `make test SERVICE=search-sync-worker` +Expected: all worker tests green. + +- [ ] **Step 5: Commit** + +```bash +git add search-sync-worker/user_room.go search-sync-worker/user_room_test.go +git commit -m "feat(search-sync-worker): derive user-room template name from env value" +``` + +--- + +## Task 6: Update search-sync-worker docker-compose + +**Files:** +- Modify: `search-sync-worker/deploy/docker-compose.yml` + +- [ ] **Step 1: Add the two new env vars** + +Edit `search-sync-worker/deploy/docker-compose.yml`. After the existing `MSG_INDEX_PREFIX` line, add: + +```yaml + - SPOTLIGHT_INDEX=spotlight-site-local-v1 + - USER_ROOM_INDEX=user-room-mv-site-local +``` + +The final environment block should read: + +```yaml + environment: + - NATS_URL=nats://nats:4222 + - NATS_CREDS_FILE=/etc/nats/backend.creds + - SITE_ID=site-local + - SEARCH_URL=http://elasticsearch:9200 + - SEARCH_BACKEND=elasticsearch + - MSG_INDEX_PREFIX=messages-site-local-v1 + - SPOTLIGHT_INDEX=spotlight-site-local-v1 + - USER_ROOM_INDEX=user-room-mv-site-local + - BOOTSTRAP_STREAMS=true +``` + +Notes on chosen values: +- `spotlight-site-local-v1`: matches the `-v$` regex. The previous value `spotlight-site-local-v1-chat` would NOT pass validation since `-chat` follows the `-v1`; dropping the `-chat` suffix is intentional and aligns local dev with the new contract. +- `user-room-mv-site-local`: unversioned, matches the new design. The `-mv` keeps consistency with how `roomMembers` is referred to elsewhere in the codebase. + +- [ ] **Step 2: Verify it boots locally (optional smoke check)** + +If Docker is available locally: + +```bash +docker compose -f search-sync-worker/deploy/docker-compose.yml up -d +docker compose -f search-sync-worker/deploy/docker-compose.yml logs search-sync-worker | head -50 +docker compose -f search-sync-worker/deploy/docker-compose.yml down +``` + +Expected: worker starts without "parse config" or "invalid config" errors and logs the three index names. + +If Docker isn't available, skip this step — the integration tests in Task 11 will exercise the same code paths. + +- [ ] **Step 3: Commit** + +```bash +git add search-sync-worker/deploy/docker-compose.yml +git commit -m "chore(search-sync-worker): set SPOTLIGHT_INDEX and USER_ROOM_INDEX in compose" +``` + +--- + +## Task 7: Validate spotlight env + derive read pattern in search-service + +**Files:** +- Modify: `search-service/main.go:45-54`, `:111-125` (handler wiring) +- Modify: `search-service/handler.go:18-30` + +- [ ] **Step 1: Update SearchConfig field name + docs** + +Rename the field in `search-service/main.go:52` so the new semantic (read pattern, not concrete name) is clear at call sites: + +```go + SpotlightIndex string `env:"SPOTLIGHT_INDEX,required"` +``` + +stays the same at the env layer (operators still set `SEARCH_SPOTLIGHT_INDEX=spotlight-site-a-v1`), but downstream we'll compute and pass a derived pattern, not the raw env value. + +- [ ] **Step 2: Add startup validation + derivation** + +In `search-service/main.go`, after `cfg, err := env.ParseAs[Config]()` (around line 73) and after any `os.Exit(1)` on parse failure, add: + +```go + spotBase, _, ok := searchindex.StripVersion(cfg.Search.SpotlightIndex) + if !ok { + slog.Error("invalid config", "name", "SEARCH_SPOTLIGHT_INDEX", "value", cfg.Search.SpotlightIndex, "reason", "must end with -v, e.g. spotlight-site-a-v1") + os.Exit(1) + } + spotlightReadPattern := fmt.Sprintf("%s-*", spotBase) +``` + +User-room is NOT validated against `-v` here — the env value is used as-is for `terms_lookup`. It's still `,required`, so empty values fail at parse time. + +Add imports for `fmt` and `github.com/hmchangw/chat/pkg/searchindex`. + +- [ ] **Step 3: Update handler wiring** + +At the existing handler construction (around line 119-125 of `main.go`), the field that today reads: + +```go + SpotlightIndex: cfg.Search.SpotlightIndex, +``` + +becomes: + +```go + SpotlightReadPattern: spotlightReadPattern, +``` + +- [ ] **Step 4: Rename field on the handler struct** + +In `search-service/handler.go` around line 22-23, change: + +```go + UserRoomIndex string + SpotlightIndex string +``` + +to: + +```go + UserRoomIndex string + SpotlightReadPattern string +``` + +Find every reference to `h.cfg.SpotlightIndex` in `handler.go` (line 140 has one) and replace with `h.cfg.SpotlightReadPattern`. There should be exactly one production usage. + +- [ ] **Step 5: Update handler tests** + +In `search-service/handler_test.go`, find references to `SpotlightIndex: testSpotlightIndex` (line 89) and rename to `SpotlightReadPattern: testSpotlightIndex`. Update `testSpotlightIndex` value (line 17) to `"spotlight-*"` so it reflects a pattern, not a concrete name. Update the assertion at line 240 to expect `[]string{"spotlight-*"}` to match the new pattern contract. + +- [ ] **Step 6: Build + run unit tests** + +Run: `make build SERVICE=search-service && make test SERVICE=search-service` +Expected: builds and all unit tests pass. + +- [ ] **Step 7: Commit** + +```bash +git add search-service/main.go search-service/handler.go search-service/handler_test.go +git commit -m "feat(search-service): derive spotlight read pattern from versioned env" +``` + +--- + +## Task 8: Remove dead UserRoomIndex fallback in search-service + +**Files:** +- Modify: `search-service/store_es.go:9-39` +- Modify: `search-service/store_es_test.go` +- Modify: `search-service/query_messages.go:31-32`, `:136-145` +- Modify: `search-service/query_messages_test.go` + +- [ ] **Step 1: Delete the fallback test** + +In `search-service/store_es_test.go`, find the test (around line 104) that asserts `resolveUserRoomIndex("")` returns the `UserRoomIndex` constant and delete that test entirely. If the file has another test that exercises `resolveUserRoomIndex` with a non-empty argument and asserts it's returned unchanged, also delete that — once the function is gone, the test goes with it. + +- [ ] **Step 2: Run remaining tests to confirm no other references** + +Run: `grep -rn "resolveUserRoomIndex\|const UserRoomIndex" search-service/` +Expected: only references remaining are the ones we're about to delete in `store_es.go` and `query_messages.go`. If any other file references either symbol, list them and add them to this task before proceeding. + +- [ ] **Step 3: Update store_es.go** + +In `search-service/store_es.go`, delete: + +- The `const UserRoomIndex = "user-room"` declaration around line 13. +- The `resolveUserRoomIndex(...)` function around lines 32-39. +- The call to `resolveUserRoomIndex(userRoomIndex)` inside `newESStore` around line 29 — replace `resolveUserRoomIndex(userRoomIndex)` with `userRoomIndex` directly so the empty-value fallback is gone. + +Final `newESStore` should read: + +```go +func newESStore(engine *searchengine.Engine, userRoomIndex string) *esStore { + return &esStore{engine: engine, userRoomIndex: userRoomIndex} +} +``` + +- [ ] **Step 4: Update query_messages.go** + +In `search-service/query_messages.go`, delete the `userRoomIndex = resolveUserRoomIndex(userRoomIndex)` line at line 32 — the caller (`handler.go` line 83) now passes a guaranteed-non-empty value (env is `,required`). + +Update the doc comment at lines 136-139 (currently documents the resolver dependency). Replace with: + +```go +// termsLookupClause resolves the user's allowed rooms via ES terms-lookup +// instead of shipping the rooms[] array on every query. The caller must +// pass a concrete, non-empty index name (validated upstream via the +// USER_ROOM_INDEX env var being marked ,required in main.go). ES +// terms_lookup rejects wildcard patterns, which is why this index is +// intentionally unversioned across the codebase. +``` + +- [ ] **Step 5: Update query_messages_test.go** + +In `search-service/query_messages_test.go`, find any test that depends on the fallback (passes `""` and expects `"user-room"`). Update to pass an explicit test index name like `"test-user-room"` and assert that exact value flows into the produced query body. + +- [ ] **Step 6: Update integration_test.go fixture names if needed** + +In `search-service/integration_test.go`, find references to the `UserRoomIndex` constant (lines 119, 276, 409, 518). Since the constant is gone, replace those references with a locally-scoped test constant at the top of the file: + +```go +const testUserRoomIndex = "user-room-mv-site-test" +``` + +Replace each `UserRoomIndex` reference inside the file with `testUserRoomIndex`. Same goes for the spotlight index references — define `const testSpotlightIndex = "spotlight-site-test-v1"` for consistency and use it in lieu of any inline `"spotlight-test"` strings. + +- [ ] **Step 7: Build + run unit tests** + +Run: `make build SERVICE=search-service && make test SERVICE=search-service` +Expected: green. + +- [ ] **Step 8: Commit** + +```bash +git add search-service/store_es.go search-service/store_es_test.go search-service/query_messages.go search-service/query_messages_test.go search-service/integration_test.go +git commit -m "refactor(search-service): drop dead UserRoomIndex fallback now that env is required" +``` + +--- + +## Task 9: Update search-service docker-compose + +**Files:** +- Modify: `search-service/deploy/docker-compose.yml:23-24` + +- [ ] **Step 1: Update the two index env values** + +Replace line 23 of `search-service/deploy/docker-compose.yml`: + +```yaml + - SEARCH_USER_ROOM_INDEX=user-room-site-local +``` + +with: + +```yaml + - SEARCH_USER_ROOM_INDEX=user-room-mv-site-local +``` + +Replace line 24: + +```yaml + - SEARCH_SPOTLIGHT_INDEX=spotlight-site-local-v1-chat +``` + +with: + +```yaml + - SEARCH_SPOTLIGHT_INDEX=spotlight-site-local-v1 +``` + +The `-chat` suffix is dropped so the value matches the `-v$` regex enforced by Task 7. Both values now match exactly what `search-sync-worker/deploy/docker-compose.yml` writes to. + +- [ ] **Step 2: Commit** + +```bash +git add search-service/deploy/docker-compose.yml +git commit -m "chore(search-service): align index env values with versioned contract" +``` + +--- + +## Task 10: End-to-end verification + +**Files:** +- None modified — verification only. + +- [ ] **Step 1: Run full unit test suite** + +Run: `make test` +Expected: every package green, no race failures, no skipped packages. + +- [ ] **Step 2: Run lint** + +Run: `make lint` +Expected: zero warnings. If `golangci-lint` flags an unused import (likely `fmt` in `search-sync-worker/main.go` if no other usage remained), remove it and re-run. + +- [ ] **Step 3: Run integration tests for both services** + +Run: `make test-integration SERVICE=search-sync-worker` +Expected: green. The worker integration test stands up a real Elasticsearch via testcontainers and exercises template upsert + indexing — this is where any mismatch between the template name/pattern and actual write index names would surface. + +Run: `make test-integration SERVICE=search-service` +Expected: green. + +- [ ] **Step 4: Local docker-compose smoke (optional but recommended)** + +If Docker is available: + +```bash +docker compose -f docker-local/docker-compose.yml up -d elasticsearch nats valkey +docker compose -f search-sync-worker/deploy/docker-compose.yml up -d +docker compose -f search-service/deploy/docker-compose.yml up -d +``` + +Wait ~10 seconds, then: + +```bash +docker compose -f search-sync-worker/deploy/docker-compose.yml logs search-sync-worker | grep -E "index template upserted|search-sync-worker running" +docker compose -f search-service/deploy/docker-compose.yml logs search-service | grep -E "ready|listening" +``` + +Expected log lines: +- `index template upserted` appears three times — once each for `messages-site-local_template`, `spotlight-site-local_template`, `user-room-mv-site-local_template`. +- `search-sync-worker running` with `msgPrefix=messages-site-local-v1 spotlightIndex=spotlight-site-local-v1 userRoomIndex=user-room-mv-site-local`. +- search-service starts without "invalid config" errors. + +Then check the templates landed in ES: + +```bash +curl -s http://localhost:9200/_index_template | jq '.index_templates[].name' +``` + +Expected: includes `messages-site-local_template`, `spotlight-site-local_template`, `user-room-mv-site-local_template`. + +Tear down: + +```bash +docker compose -f search-service/deploy/docker-compose.yml down +docker compose -f search-sync-worker/deploy/docker-compose.yml down +``` + +- [ ] **Step 5: Push branch** + +```bash +git push -u origin claude/search-index-env-vars-5cts2 +``` + +--- + +## Out of scope (intentionally deferred) + +These are NOT in this plan — flagging them so the executor doesn't drift: + +- **Production env coordination.** Ops/IaC sets the actual env values per site. Any prod site running the old `spotlight-{site}-v1-chat` value needs a coordinated rename or reindex. This plan only touches local-dev docker-compose. Production migration is a separate ops concern. +- **`docs/client-api.md` updates.** No client-facing handler changes here — both services' request/response schemas are untouched. CLAUDE.md's client-api requirement does not apply. +- **Federation for spotlight/user-room.** Today only messages search uses cross-cluster `*:` patterns. If/when spotlight gets federated, the read pattern would change to `[-*, *:-*]`; out of scope here. +- **Reindex tooling.** The v1→v2 reindex workflow (create v2, `_reindex` API call, bump env, redeploy, drop v1) is documented in this plan's design discussion but not automated. A future plan could add a `make reindex` target if it's a frequent operation. +- **`MSG_INDEX_PREFIX` semantic shift in prod data.** Today's prod monthly indices are named `messages-{site}-YYYY-MM` (no `v1`). After this change, new monthly indices created against an env of `messages-{site}-v1` will be `messages-{site}-v1-YYYY-MM`. The template pattern `messages-{site}-*` still matches both. No data migration required, but ops should be aware that newly-written months will have the version segment. diff --git a/pkg/searchindex/version.go b/pkg/searchindex/version.go new file mode 100644 index 000000000..cb76dbe05 --- /dev/null +++ b/pkg/searchindex/version.go @@ -0,0 +1,29 @@ +// Package searchindex provides helpers shared by services that interact +// with Elasticsearch indices managed by search-sync-worker. +package searchindex + +import ( + "regexp" + "strconv" +) + +var versionSuffix = regexp.MustCompile(`-v(\d+)$`) + +// StripVersion splits "-v" into its base and integer version. +// Returns ok=false for unversioned names (e.g. user-room indices) so +// callers can treat the value as a literal index name. +func StripVersion(name string) (base string, version int, ok bool) { + m := versionSuffix.FindStringSubmatchIndex(name) + if m == nil { + return name, 0, false + } + v, _ := strconv.Atoi(name[m[2]:m[3]]) + return name[:m[0]], v, true +} + +// StripVersionBase returns just the base from StripVersion, discarding +// the version number and ok flag. +func StripVersionBase(name string) string { + base, _, _ := StripVersion(name) + return base +} diff --git a/pkg/searchindex/version_test.go b/pkg/searchindex/version_test.go new file mode 100644 index 000000000..2a866bac0 --- /dev/null +++ b/pkg/searchindex/version_test.go @@ -0,0 +1,83 @@ +package searchindex + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestStripVersion(t *testing.T) { + tests := []struct { + name string + input string + wantBase string + wantVersion int + wantOK bool + }{ + { + name: "single digit version", + input: "messages-site-a-v1", + wantBase: "messages-site-a", + wantVersion: 1, + wantOK: true, + }, + { + name: "multi digit version", + input: "spotlight-site-b-v42", + wantBase: "spotlight-site-b", + wantVersion: 42, + wantOK: true, + }, + { + name: "no suffix returns input unchanged", + input: "user-room-mv-site-a", + wantBase: "user-room-mv-site-a", + wantVersion: 0, + wantOK: false, + }, + { + name: "uppercase V is not stripped", + input: "messages-site-a-V1", + wantBase: "messages-site-a-V1", + wantVersion: 0, + wantOK: false, + }, + { + name: "non-numeric tail is not stripped", + input: "messages-site-a-v1a", + wantBase: "messages-site-a-v1a", + wantVersion: 0, + wantOK: false, + }, + { + name: "version in middle is not stripped", + input: "messages-v1-site-a", + wantBase: "messages-v1-site-a", + wantVersion: 0, + wantOK: false, + }, + { + name: "empty input", + input: "", + wantBase: "", + wantVersion: 0, + wantOK: false, + }, + { + name: "only version suffix", + input: "-v1", + wantBase: "", + wantVersion: 1, + wantOK: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + base, version, ok := StripVersion(tc.input) + assert.Equal(t, tc.wantBase, base) + assert.Equal(t, tc.wantVersion, version) + assert.Equal(t, tc.wantOK, ok) + }) + } +} diff --git a/search-service/deploy/docker-compose.yml b/search-service/deploy/docker-compose.yml index 9402c4b9e..5a735a82e 100644 --- a/search-service/deploy/docker-compose.yml +++ b/search-service/deploy/docker-compose.yml @@ -20,8 +20,8 @@ services: - SEARCH_REQUEST_TIMEOUT=10s - SEARCH_METRICS_ADDR=:9090 # Local dev pins to site-suffixed indices; prod uses aliases owned by ops/IaC. - - SEARCH_USER_ROOM_INDEX=user-room-site-local - - SEARCH_SPOTLIGHT_INDEX=spotlight-site-local-v1-chat + - SEARCH_USER_ROOM_INDEX=user-room-mv-site-local + - SEARCH_SPOTLIGHT_INDEX=spotlight-site-local-v1 ports: # Expose /metrics so Prometheus / curl can scrape from the host # during local dev. The listener is bound on 0.0.0.0:9090 inside diff --git a/search-service/handler.go b/search-service/handler.go index 06753a9e4..9587ce594 100644 --- a/search-service/handler.go +++ b/search-service/handler.go @@ -20,7 +20,7 @@ type handlerConfig struct { RecentWindow time.Duration RequestTimeout time.Duration UserRoomIndex string - SpotlightIndex string + SpotlightReadPattern string } type handler struct { @@ -137,7 +137,7 @@ func (h *handler) searchRooms(c *natsrouter.Context, req model.SearchRoomsReques } observeESDone := observeES() - raw, err := h.store.Search(ctx, []string{h.cfg.SpotlightIndex}, body) + raw, err := h.store.Search(ctx, []string{h.cfg.SpotlightReadPattern}, body) observeESDone() if err != nil { slog.Error("room search backend failed", "account", account, "error", err) diff --git a/search-service/handler_test.go b/search-service/handler_test.go index 9fa720847..e7f20ecc8 100644 --- a/search-service/handler_test.go +++ b/search-service/handler_test.go @@ -14,7 +14,7 @@ import ( "github.com/hmchangw/chat/pkg/natsrouter" ) -const testSpotlightIndex = "spotlight" +const testSpotlightIndex = "spotlight-*" type fakeStore struct { searchCalls []searchCall @@ -86,7 +86,7 @@ func newTestHandler(store SearchStore, cache RestrictedRoomCache) *handler { MaxDocCounts: 100, RestrictedRoomsCacheTTL: 5 * time.Minute, RecentWindow: 365 * 24 * time.Hour, - SpotlightIndex: testSpotlightIndex, + SpotlightReadPattern: testSpotlightIndex, }) } diff --git a/search-service/integration_test.go b/search-service/integration_test.go index 4dfc2d813..45e3f37e4 100644 --- a/search-service/integration_test.go +++ b/search-service/integration_test.go @@ -29,6 +29,8 @@ import ( "github.com/hmchangw/chat/pkg/valkeyutil" ) +const testUserRoomIndex = "user-room" + // --- Fixture ----------------------------------------------------------------- // ccsFixture is the full stack for cross-cluster integration tests: two ES @@ -113,10 +115,7 @@ func setupCCSFixture(t *testing.T) *ccsFixture { t.Cleanup(func() { clientNC.Close() }) t.Logf("CCS fixture: NATS at %s", natsURL) - // Thread the same index name through both the store and handlerConfig - // so the test exercises the full SEARCH_USER_ROOM_INDEX wiring path - // (store.GetUserRoomDoc → ES index; query builder → terms-lookup index). - userRoomIndex := UserRoomIndex + userRoomIndex := testUserRoomIndex store := newESStore(localEngine, userRoomIndex) cache := newValkeyCache(valkeyClient) handler := newHandler(store, cache, handlerConfig{ @@ -125,7 +124,7 @@ func setupCCSFixture(t *testing.T) *ccsFixture { RestrictedRoomsCacheTTL: 5 * time.Minute, RecentWindow: 365 * 24 * time.Hour, UserRoomIndex: userRoomIndex, - SpotlightIndex: "spotlight-test", + SpotlightReadPattern: "spotlight-test-*", }) router := natsrouter.New(serverNC, "search-service-test") @@ -273,7 +272,7 @@ func messageTestTemplate() json.RawMessage { } func userRoomTestTemplate() json.RawMessage { - return buildTestTemplate(UserRoomIndex, map[string]any{ + return buildTestTemplate(testUserRoomIndex, map[string]any{ "userAccount": map[string]any{"type": "keyword"}, "rooms": map[string]any{ "type": "text", @@ -406,7 +405,7 @@ func TestSearchService_SearchMessages_CCS_CrossCluster_Unrestricted(t *testing.T monthIdx := "messages-" + createdAt.Format("2006-01") // user-room doc: unrestricted memberships in both rooms. - seedDoc(t, f.localURL, UserRoomIndex, account, map[string]any{ + seedDoc(t, f.localURL, testUserRoomIndex, account, map[string]any{ "userAccount": account, "rooms": []string{localRoomID, remoteRoomID}, "restrictedRooms": map[string]int64{}, @@ -515,7 +514,7 @@ func TestSearchService_SearchMessages_CCS_CrossCluster_Restricted(t *testing.T) // user-room doc: local room unrestricted, remote room restricted with hss. t.Logf("seed: upserting user-room doc for %s (restricted %s since %s)", account, remoteRoomID, hss.Format(time.RFC3339)) - seedDoc(t, f.localURL, UserRoomIndex, account, map[string]any{ + seedDoc(t, f.localURL, testUserRoomIndex, account, map[string]any{ "userAccount": account, "rooms": []string{localRoomID}, "restrictedRooms": map[string]int64{ diff --git a/search-service/main.go b/search-service/main.go index 57de16233..b8abdec8a 100644 --- a/search-service/main.go +++ b/search-service/main.go @@ -3,6 +3,7 @@ package main import ( "context" "errors" + "fmt" "log/slog" "net" "net/http" @@ -15,6 +16,7 @@ import ( "github.com/hmchangw/chat/pkg/natsutil" "github.com/hmchangw/chat/pkg/otelutil" "github.com/hmchangw/chat/pkg/searchengine" + "github.com/hmchangw/chat/pkg/searchindex" "github.com/hmchangw/chat/pkg/shutdown" "github.com/hmchangw/chat/pkg/valkeyutil" ) @@ -76,6 +78,13 @@ func main() { os.Exit(1) } + spotlightBase, _, ok := searchindex.StripVersion(cfg.Search.SpotlightIndex) + if !ok { + slog.Error("invalid config", "name", "SEARCH_SPOTLIGHT_INDEX", "value", cfg.Search.SpotlightIndex, "reason", "must end with -v, e.g. spotlight-site-a-v1") + os.Exit(1) + } + spotlightReadPattern := fmt.Sprintf("%s-*", spotlightBase) + ctx := context.Background() tracerShutdown, err := otelutil.InitTracer(ctx, "search-service") @@ -117,7 +126,7 @@ func main() { RecentWindow: cfg.Search.RecentWindow, RequestTimeout: cfg.Search.RequestTimeout, UserRoomIndex: cfg.Search.UserRoomIndex, - SpotlightIndex: cfg.Search.SpotlightIndex, + SpotlightReadPattern: spotlightReadPattern, }) router := natsrouter.New(nc, "search-service") diff --git a/search-service/query_messages.go b/search-service/query_messages.go index b04a678a0..19f0a4664 100644 --- a/search-service/query_messages.go +++ b/search-service/query_messages.go @@ -29,7 +29,6 @@ var MessageIndexPattern = []string{"messages-*", "*:messages-*"} // terms-lookup so a caller can't reach rooms they don't belong to by // passing arbitrary roomIds. func buildMessageQuery(req model.SearchMessagesRequest, account string, restricted map[string]int64, recentWindow time.Duration, userRoomIndex string) (json.RawMessage, error) { - userRoomIndex = resolveUserRoomIndex(userRoomIndex) clauses := roomAccessClauses(req.RoomIDs, account, restricted, userRoomIndex) body := map[string]any{ @@ -134,9 +133,11 @@ func scopedAccessClauses(roomIDs []string, account string, restricted map[string } // termsLookupClause resolves the user's allowed rooms via ES terms-lookup -// instead of shipping the rooms[] array on every query. Passing an empty -// userRoomIndex would produce an invalid index name, so callers must -// resolve it (e.g. via resolveUserRoomIndex) before calling. +// instead of shipping the rooms[] array on every query. The caller must +// pass a concrete, non-empty index name (enforced upstream by the +// SEARCH_USER_ROOM_INDEX env var being marked ,required in main.go). ES +// terms_lookup rejects wildcard patterns, which is why this index is +// intentionally unversioned across the codebase. func termsLookupClause(account, userRoomIndex string) map[string]any { return map[string]any{ "terms": map[string]any{ diff --git a/search-service/query_messages_test.go b/search-service/query_messages_test.go index 85696d4f7..1f770959f 100644 --- a/search-service/query_messages_test.go +++ b/search-service/query_messages_test.go @@ -35,7 +35,7 @@ func shouldClauses(t *testing.T, q map[string]any) []any { func TestBuildMessageQuery_GlobalUnrestricted(t *testing.T) { req := model.SearchMessagesRequest{SearchText: "hello", Size: 25, Offset: 0} - raw, err := buildMessageQuery(req, "alice", nil, 365*24*time.Hour, "") + raw, err := buildMessageQuery(req, "alice", nil, 365*24*time.Hour, "user-room") require.NoError(t, err) q := parseQuery(t, raw) @@ -58,7 +58,7 @@ func TestBuildMessageQuery_GlobalWithRestricted(t *testing.T) { "room-b": 1_700_000_000_000, "room-a": 1_600_000_000_000, } - raw, err := buildMessageQuery(req, "alice", restricted, 24*time.Hour, "") + raw, err := buildMessageQuery(req, "alice", restricted, 24*time.Hour, "user-room") require.NoError(t, err) q := parseQuery(t, raw) @@ -117,7 +117,7 @@ func TestBuildMessageQuery_ScopedInlineTerms(t *testing.T) { SearchText: "hi", RoomIDs: []string{"r1", "r2", "r3"}, } - raw, err := buildMessageQuery(req, "alice", nil, time.Hour, "") + raw, err := buildMessageQuery(req, "alice", nil, time.Hour, "user-room") require.NoError(t, err) shoulds := shouldClauses(t, parseQuery(t, raw)) @@ -133,7 +133,7 @@ func TestBuildMessageQuery_ScopedMixed(t *testing.T) { RoomIDs: []string{"r1", "restricted-r2", "r3"}, } restricted := map[string]int64{"restricted-r2": 1_600_000_000_000} - raw, err := buildMessageQuery(req, "alice", restricted, time.Hour, "") + raw, err := buildMessageQuery(req, "alice", restricted, time.Hour, "user-room") require.NoError(t, err) shoulds := shouldClauses(t, parseQuery(t, raw)) @@ -170,7 +170,7 @@ func TestBuildMessageQuery_ScopedAllRestricted(t *testing.T) { RoomIDs: []string{"ra"}, } restricted := map[string]int64{"ra": 1_700_000_000_000} - raw, err := buildMessageQuery(req, "alice", restricted, time.Hour, "") + raw, err := buildMessageQuery(req, "alice", restricted, time.Hour, "user-room") require.NoError(t, err) shoulds := shouldClauses(t, parseQuery(t, raw)) @@ -181,7 +181,7 @@ func TestBuildMessageQuery_ScopedAllRestricted(t *testing.T) { func TestBuildMessageQuery_RecentWindow(t *testing.T) { req := model.SearchMessagesRequest{SearchText: "hi"} - raw, err := buildMessageQuery(req, "alice", nil, 48*time.Hour, "") + raw, err := buildMessageQuery(req, "alice", nil, 48*time.Hour, "user-room") require.NoError(t, err) filters := filterClauses(t, parseQuery(t, raw)) @@ -192,7 +192,7 @@ func TestBuildMessageQuery_RecentWindow(t *testing.T) { func TestBuildMessageQuery_RecentWindowDefault(t *testing.T) { req := model.SearchMessagesRequest{SearchText: "hi"} - raw, err := buildMessageQuery(req, "alice", nil, 0, "") + raw, err := buildMessageQuery(req, "alice", nil, 0, "user-room") require.NoError(t, err) filters := filterClauses(t, parseQuery(t, raw)) diff --git a/search-service/store_es.go b/search-service/store_es.go index 1da83228b..4387be1ca 100644 --- a/search-service/store_es.go +++ b/search-service/store_es.go @@ -6,12 +6,6 @@ import ( "fmt" ) -// UserRoomIndex is the default index holding per-user access-control docs. -// The sync-worker uses a site-qualified name internally, but the search -// service reaches it via a stable alias — one name across cluster -// topologies. Overridable via SEARCH_USER_ROOM_INDEX. -const UserRoomIndex = "user-room" - // esEngine is the narrow slice of pkg/searchengine.SearchEngine the // store uses — declared at the consumer so unit tests can stub without // satisfying the full SearchEngine contract. @@ -26,17 +20,7 @@ type esStore struct { } func newESStore(engine esEngine, userRoomIndex string) *esStore { - return &esStore{engine: engine, userRoomIndex: resolveUserRoomIndex(userRoomIndex)} -} - -// resolveUserRoomIndex falls back to UserRoomIndex when empty. Kept as a -// single normalization point so both newESStore and termsLookupClause -// consult the same default without repeating the `if == ""` branch. -func resolveUserRoomIndex(name string) string { - if name == "" { - return UserRoomIndex - } - return name + return &esStore{engine: engine, userRoomIndex: userRoomIndex} } func (s *esStore) Search(ctx context.Context, indices []string, body json.RawMessage) (json.RawMessage, error) { diff --git a/search-service/store_es_test.go b/search-service/store_es_test.go index e50b2b5e8..eae5f4752 100644 --- a/search-service/store_es_test.go +++ b/search-service/store_es_test.go @@ -94,12 +94,3 @@ func TestESStore_GetUserRoomDoc_MalformedBody(t *testing.T) { _, _, err := s.GetUserRoomDoc(context.Background(), "alice") assert.Error(t, err) } - -func TestESStore_UsesDefaultIndexWhenEmpty(t *testing.T) { - eng := &stubEngine{docFound: false} - s := newESStore(eng, "") - - _, _, err := s.GetUserRoomDoc(context.Background(), "alice") - require.NoError(t, err) - assert.Equal(t, UserRoomIndex, eng.docIndex) -} diff --git a/search-sync-worker/deploy/docker-compose.yml b/search-sync-worker/deploy/docker-compose.yml index f2948e0bc..128a00a3a 100644 --- a/search-sync-worker/deploy/docker-compose.yml +++ b/search-sync-worker/deploy/docker-compose.yml @@ -12,6 +12,8 @@ services: - SEARCH_URL=http://elasticsearch:9200 - SEARCH_BACKEND=elasticsearch - MSG_INDEX_PREFIX=messages-site-local-v1 + - SPOTLIGHT_INDEX=spotlight-site-local-v1 + - USER_ROOM_INDEX=user-room-mv-site-local - BOOTSTRAP_STREAMS=true volumes: - ../../docker-local/backend.creds:/etc/nats/backend.creds:ro diff --git a/search-sync-worker/inbox_integration_test.go b/search-sync-worker/inbox_integration_test.go index 73772a515..f5a69719b 100644 --- a/search-sync-worker/inbox_integration_test.go +++ b/search-sync-worker/inbox_integration_test.go @@ -150,7 +150,7 @@ func TestSpotlightSync_Integration(t *testing.T) { ctx := context.Background() siteID := "site-spot" - indexName := "spotlight-site-spot-v1-chat" + indexName := "spotlight-singular-v1" // --- ES template + index --- engine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: esURL}) @@ -243,7 +243,7 @@ func TestSpotlightSync_BulkInvite(t *testing.T) { ctx := context.Background() siteID := "site-spot-bulk" - indexName := "spotlight-site-spot-bulk-v1-chat" + indexName := "spotlight-bulk-v1" engine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: esURL}) require.NoError(t, err) diff --git a/search-sync-worker/main.go b/search-sync-worker/main.go index 2d98a687b..f874bc8ed 100644 --- a/search-sync-worker/main.go +++ b/search-sync-worker/main.go @@ -15,6 +15,7 @@ import ( "github.com/hmchangw/chat/pkg/natsutil" "github.com/hmchangw/chat/pkg/otelutil" "github.com/hmchangw/chat/pkg/searchengine" + "github.com/hmchangw/chat/pkg/searchindex" "github.com/hmchangw/chat/pkg/shutdown" "github.com/hmchangw/chat/pkg/stream" ) @@ -49,8 +50,8 @@ type config struct { SearchPassword string `env:"SEARCH_PASSWORD" envDefault:""` SearchTLSSkipVerify bool `env:"SEARCH_TLS_SKIP_VERIFY" envDefault:"false"` MsgIndexPrefix string `env:"MSG_INDEX_PREFIX,required"` - SpotlightIndex string `env:"SPOTLIGHT_INDEX" envDefault:""` - UserRoomIndex string `env:"USER_ROOM_INDEX" envDefault:""` + SpotlightIndex string `env:"SPOTLIGHT_INDEX,required"` + UserRoomIndex string `env:"USER_ROOM_INDEX,required"` // FetchBatchSize is the maximum number of JetStream messages to pull // per Fetch() round-trip. Smaller values give lower latency per message @@ -88,13 +89,6 @@ func main() { os.Exit(1) } - if cfg.SpotlightIndex == "" { - cfg.SpotlightIndex = fmt.Sprintf("spotlight-%s-v1-chat", cfg.SiteID) - } - if cfg.UserRoomIndex == "" { - cfg.UserRoomIndex = fmt.Sprintf("user-room-%s", cfg.SiteID) - } - // Fail fast on non-positive batch/interval settings. Zero or negative // values degenerate runConsumer into busy loops (`Fetch(0)`, constant // flush checks) or stall it forever (`remaining <= 0` on every @@ -113,6 +107,14 @@ func main() { slog.Error("invalid config", "name", "BULK_FLUSH_INTERVAL", "value", cfg.BulkFlushInterval, "reason", "must be > 0") os.Exit(1) } + if _, _, ok := searchindex.StripVersion(cfg.MsgIndexPrefix); !ok { + slog.Error("invalid config", "name", "MSG_INDEX_PREFIX", "value", cfg.MsgIndexPrefix, "reason", "must end with -v, e.g. messages-site-a-v1") + os.Exit(1) + } + if _, _, ok := searchindex.StripVersion(cfg.SpotlightIndex); !ok { + slog.Error("invalid config", "name", "SPOTLIGHT_INDEX", "value", cfg.SpotlightIndex, "reason", "must end with -v, e.g. spotlight-site-a-v1") + os.Exit(1) + } ctx := context.Background() diff --git a/search-sync-worker/messages.go b/search-sync-worker/messages.go index 600a7b938..98914c0e6 100644 --- a/search-sync-worker/messages.go +++ b/search-sync-worker/messages.go @@ -9,6 +9,7 @@ import ( "github.com/hmchangw/chat/pkg/model" "github.com/hmchangw/chat/pkg/searchengine" + "github.com/hmchangw/chat/pkg/searchindex" "github.com/hmchangw/chat/pkg/stream" ) @@ -39,7 +40,7 @@ func (c *messageCollection) FilterSubjects(_ string) []string { } func (c *messageCollection) TemplateName() string { - return fmt.Sprintf("%s_template", c.indexPrefix) + return fmt.Sprintf("%s_template", searchindex.StripVersionBase(c.indexPrefix)) } func (c *messageCollection) TemplateBody() json.RawMessage { @@ -146,7 +147,7 @@ func messageTemplateProperties() map[string]any { func messageTemplateBody(prefix string) json.RawMessage { tmpl := map[string]any{ - "index_patterns": []string{fmt.Sprintf("%s-*", prefix)}, + "index_patterns": []string{fmt.Sprintf("%s-*", searchindex.StripVersionBase(prefix))}, "template": map[string]any{ "settings": map[string]any{ "index": map[string]any{ diff --git a/search-sync-worker/messages_test.go b/search-sync-worker/messages_test.go index 9196132a3..05eea6b25 100644 --- a/search-sync-worker/messages_test.go +++ b/search-sync-worker/messages_test.go @@ -14,12 +14,17 @@ import ( "github.com/hmchangw/chat/pkg/searchengine" ) -func TestMessageCollection_TemplateName(t *testing.T) { +func TestMessageCollection_TemplateName_StripsVersion(t *testing.T) { coll := newMessageCollection("messages-site1-v1") - assert.Equal(t, "messages-site1-v1_template", coll.TemplateName()) + assert.Equal(t, "messages-site1_template", coll.TemplateName()) } -func TestMessageCollection_TemplateBody(t *testing.T) { +func TestMessageCollection_TemplateName_BareBaseFallback(t *testing.T) { + coll := newMessageCollection("messages-site1") + assert.Equal(t, "messages-site1_template", coll.TemplateName()) +} + +func TestMessageCollection_TemplateBody_PatternStripsVersion(t *testing.T) { coll := newMessageCollection("messages-site1-v1") body := coll.TemplateBody() require.NotNil(t, body) @@ -29,7 +34,8 @@ func TestMessageCollection_TemplateBody(t *testing.T) { patterns, ok := parsed["index_patterns"].([]any) require.True(t, ok) - assert.Equal(t, "messages-site1-v1-*", patterns[0]) + require.Len(t, patterns, 1) + assert.Equal(t, "messages-site1-*", patterns[0]) tmpl := parsed["template"].(map[string]any) mappings := tmpl["mappings"].(map[string]any) diff --git a/search-sync-worker/spotlight.go b/search-sync-worker/spotlight.go index d5bcbd38e..cfdfce5b7 100644 --- a/search-sync-worker/spotlight.go +++ b/search-sync-worker/spotlight.go @@ -7,6 +7,7 @@ import ( "github.com/hmchangw/chat/pkg/model" "github.com/hmchangw/chat/pkg/searchengine" + "github.com/hmchangw/chat/pkg/searchindex" ) // spotlightCollection implements Collection for spotlight room-typeahead @@ -28,7 +29,7 @@ func (c *spotlightCollection) ConsumerName() string { } func (c *spotlightCollection) TemplateName() string { - return "spotlight_template" + return fmt.Sprintf("%s_template", searchindex.StripVersionBase(c.indexName)) } func (c *spotlightCollection) TemplateBody() json.RawMessage { @@ -123,13 +124,12 @@ func newSpotlightSearchIndex(account string, evt *model.InboxMemberEvent) Spotli } } -// spotlightTemplateBody builds the ES index template for the spotlight -// collection. The `index_patterns` field is set to the exact configured -// index name so a custom SPOTLIGHT_INDEX value still receives the correct -// mapping (no broad wildcard that might catch unrelated indices). +// spotlightTemplateBody builds the ES index template. The wildcard +// index_patterns lets a single template cover the current versioned +// index and any future reindex targets. func spotlightTemplateBody(indexName string) json.RawMessage { tmpl := map[string]any{ - "index_patterns": []string{indexName}, + "index_patterns": []string{fmt.Sprintf("%s-*", searchindex.StripVersionBase(indexName))}, "template": map[string]any{ "settings": map[string]any{ "index": map[string]any{ diff --git a/search-sync-worker/spotlight_test.go b/search-sync-worker/spotlight_test.go index 8c9722e1d..bc322f934 100644 --- a/search-sync-worker/spotlight_test.go +++ b/search-sync-worker/spotlight_test.go @@ -49,7 +49,6 @@ func TestSpotlightCollection_Metadata(t *testing.T) { coll := newSpotlightCollection("spotlight-site-a-v1-chat") assert.Equal(t, "spotlight-sync", coll.ConsumerName()) - assert.Equal(t, "spotlight_template", coll.TemplateName()) cfg := coll.StreamConfig("site-a") assert.Equal(t, "INBOX_site-a", cfg.Name) @@ -68,19 +67,24 @@ func TestSpotlightCollection_Metadata(t *testing.T) { }, filters) } -func TestSpotlightCollection_TemplateBody(t *testing.T) { - coll := newSpotlightCollection("spotlight-site-a-v1-chat") - body := coll.TemplateBody() - require.NotNil(t, body) +func TestSpotlightCollection_TemplateName_StripsVersion(t *testing.T) { + c := newSpotlightCollection("spotlight-site-a-v1") + assert.Equal(t, "spotlight-site-a_template", c.TemplateName()) +} - var parsed map[string]any - require.NoError(t, json.Unmarshal(body, &parsed)) +func TestSpotlightCollection_TemplateBody_PatternStripsVersion(t *testing.T) { + c := newSpotlightCollection("spotlight-site-a-v1") + body := c.TemplateBody() + require.NotNil(t, body) - patterns, ok := parsed["index_patterns"].([]any) + var decoded map[string]any + require.NoError(t, json.Unmarshal(body, &decoded)) + patterns, ok := decoded["index_patterns"].([]any) require.True(t, ok) - assert.Equal(t, "spotlight-site-a-v1-chat", patterns[0]) + require.Len(t, patterns, 1) + assert.Equal(t, "spotlight-site-a-*", patterns[0]) - tmpl := parsed["template"].(map[string]any) + tmpl := decoded["template"].(map[string]any) mappings := tmpl["mappings"].(map[string]any) props := mappings["properties"].(map[string]any) assert.Contains(t, props, "userAccount") diff --git a/search-sync-worker/user_room.go b/search-sync-worker/user_room.go index aa16da4e5..89bad7a76 100644 --- a/search-sync-worker/user_room.go +++ b/search-sync-worker/user_room.go @@ -37,7 +37,7 @@ func (c *userRoomCollection) ConsumerName() string { } func (c *userRoomCollection) TemplateName() string { - return "user_room_template" + return fmt.Sprintf("%s_template", c.indexName) } func (c *userRoomCollection) TemplateBody() json.RawMessage { diff --git a/search-sync-worker/user_room_test.go b/search-sync-worker/user_room_test.go index a3c18cb68..d60f4f407 100644 --- a/search-sync-worker/user_room_test.go +++ b/search-sync-worker/user_room_test.go @@ -11,11 +11,15 @@ import ( "github.com/hmchangw/chat/pkg/searchengine" ) +func TestUserRoomCollection_TemplateName_DerivesFromEnv(t *testing.T) { + c := newUserRoomCollection("user-room-mv-site-a") + assert.Equal(t, "user-room-mv-site-a_template", c.TemplateName()) +} + func TestUserRoomCollection_Metadata(t *testing.T) { coll := newUserRoomCollection("user-room-site-a") assert.Equal(t, "user-room-sync", coll.ConsumerName()) - assert.Equal(t, "user_room_template", coll.TemplateName()) assert.NotNil(t, coll.TemplateBody()) cfg := coll.StreamConfig("site-a") @@ -35,6 +39,19 @@ func TestUserRoomCollection_Metadata(t *testing.T) { }, filters) } +func TestUserRoomCollection_TemplateBody_PatternIsExactName(t *testing.T) { + c := newUserRoomCollection("user-room-mv-site-a") + body := c.TemplateBody() + require.NotNil(t, body) + + var decoded map[string]any + require.NoError(t, json.Unmarshal(body, &decoded)) + patterns, ok := decoded["index_patterns"].([]any) + require.True(t, ok) + require.Len(t, patterns, 1) + assert.Equal(t, "user-room-mv-site-a", patterns[0]) +} + func TestUserRoomCollection_TemplateBody(t *testing.T) { coll := newUserRoomCollection("user-room-site-a") body := coll.TemplateBody() @@ -45,6 +62,7 @@ func TestUserRoomCollection_TemplateBody(t *testing.T) { patterns, ok := parsed["index_patterns"].([]any) require.True(t, ok) + require.Len(t, patterns, 1) assert.Equal(t, "user-room-site-a", patterns[0]) tmpl := parsed["template"].(map[string]any)