diff --git a/CLAUDE.md b/CLAUDE.md index 0bb5c3956..892fc58b2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -149,10 +149,34 @@ All commands are wrapped in the root Makefile. Always use `make` targets — nev - Every exported function in `pkg/` must have corresponding test cases ### Integration Tests -- All integration tests use `//go:build integration` build tag -- Use `testcontainers-go` with official modules (`mongodb`, `cassandra`, `nats`) for real dependencies -- Write `setup(t *testing.T)` helpers that start a container, register `t.Cleanup`, and return a connected client -- Use `_test` as database name to avoid collisions +- All integration tests use the `//go:build integration` build tag +- Test files live in the same package as the code under test (`package main` for services, `package ` for libraries) — never external `*_test` packages +- **Containers come from `pkg/testutil`** — do not start your own with `testcontainers.GenericContainer` / `natsmod.Run` / `mongodb.Run` etc. Process-shared helpers (one container, many tests, started via `sync.Once`, terminated via `TerminateAll`): + - `testutil.MongoDB(t, prefix) *mongo.Database` — isolated DB per test + - `testutil.CassandraKeyspace(t, prefix) (keyspace, *gocql.Session, host)` — isolated keyspace per test + - `testutil.MinIO(t, prefix) (*minio.Client, bucket)` — isolated bucket per test + - `testutil.Elasticsearch(t) string` — shared ES URL; pair with `testutil.ElasticsearchIndex(t, prefix)` for a per-test isolated index (DELETEd on cleanup) + - `testutil.NATS(t) string` — shared NATS URL with JetStream enabled +- Valkey (cluster-mode — services use this in production): + - `testutil.SharedValkeyCluster(t) *redis.ClusterClient` — process-shared cluster (started via `sync.Once`, reaped via `TerminateValkey`/`TerminateAll`). Per-test caller MUST register `t.Cleanup(func() { testutil.FlushValkey(t) })` so sibling tests start with a clean keyspace. Default choice. + - `testutil.StartValkeyCluster(t) *redis.ClusterClient` — per-test cluster (each test gets its own container via `t.Cleanup`). Use ONLY when the test asserts on cluster-routing state (e.g., `pkg/roomkeystore`'s `CLUSTER KEYSLOT` checks) or owns a store wrapper that calls `Close()` on the underlying client. +- **Every integration test package must have a `TestMain` that drives cleanup**: + ```go + //go:build integration + package mypkg + + import ( + "testing" + "github.com/hmchangw/chat/pkg/testutil" + ) + + func TestMain(m *testing.M) { testutil.RunTests(m) } + ``` + `testutil.RunTests` wraps `m.Run()` + `testutil.TerminateAll()` + `os.Exit(code)`. For concurrent pre-warming use `testutil.RunTestsWithPrewarm(m, testutil.EnsureElasticsearch, testutil.EnsureNATS, ...)` — runs each `EnsureXxx` concurrently and fails fast on the first error before `m.Run`. The `testutil.PrewarmFailFast(fns...)` building block is also exposed for packages that need extra cleanup between `m.Run` and `os.Exit`. +- **Ryuk is disabled repo-wide** (via `pkg/testutil/init.go`) because our CI runner can't run the reaper sidecar. `testutil.TerminateAll` is the only cleanup mechanism on clean exits. SIGKILL / Ctrl+C will leak containers locally — acceptable trade-off; flip Ryuk back on with `TESTCONTAINERS_RYUK_DISABLED=false go test ...` if debugging a leak. +- Per-test isolation is the caller's responsibility: the `MongoDB`/`Cassandra`/`MinIO` helpers already hash `t.Name()`; for ES use a per-test unique index name and DELETE on cleanup; for NATS use a per-test `*nats.Conn` pair with `Drain`/`Shutdown` cleanups; for shared Valkey call `testutil.FlushValkey(t)` in `t.Cleanup` (StartValkeyCluster's per-test mode is automatic). +- Inline `testcontainers.GenericContainer` is only acceptable when a shared testutil container can't accommodate the test (e.g. search-service CCS needs two ES nodes on a shared docker network; `pkg/roomkeysender` needs NATS with WebSocket transport; `pkg/roomcrypto` needs a Node container with bundled scripts). Each inline container must store its reference and register `t.Cleanup(container.Terminate)`. +- New shared dependencies (a container type used by ≥2 packages) belong in `pkg/testutil` with the same shape: `Xxx(t)` + `EnsureXxx()` + `TerminateXxx()`, container ref stored at package level, and `TerminateXxx` wired into `TerminateAll`. ### Model Tests - `pkg/model/model_test.go` verifies all domain types marshal/unmarshal correctly via a generic `roundTrip` helper diff --git a/broadcast-worker/main_test.go b/broadcast-worker/main_test.go new file mode 100644 index 000000000..937f8531a --- /dev/null +++ b/broadcast-worker/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package main + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/history-service/docker-local/.env.example b/history-service/docker-local/.env.example deleted file mode 100644 index f32b88319..000000000 --- a/history-service/docker-local/.env.example +++ /dev/null @@ -1,14 +0,0 @@ -# Environment variables for running history-service locally against docker-local services. -# Copy to .env and adjust if needed: -# cp .env.example .env -# -# Usage from repo root: -# set -a && source history-service/docker-local/.env && set +a -# go run ./history-service/cmd/ - -NATS_URL=nats://localhost:4222 -MONGO_URI=mongodb://localhost:27017 -MONGO_DB=chat -CASSANDRA_HOSTS=localhost -CASSANDRA_KEYSPACE=chat -SITE_ID=site-local diff --git a/history-service/docker-local/docker-compose.yml b/history-service/docker-local/docker-compose.yml deleted file mode 100644 index a032adf74..000000000 --- a/history-service/docker-local/docker-compose.yml +++ /dev/null @@ -1,152 +0,0 @@ -# Local development environment for history-service. -# Starts NATS, MongoDB, Cassandra, initializes schema, and runs the service. -# -# Usage: -# cd history-service/docker-local -# docker compose up -d # start everything -# docker compose logs -f history # follow service logs -# docker compose down # stop all services -# docker compose down -v # stop and remove volumes (clean slate) - - -services: - nats: - image: nats:2.11-alpine - ports: - - "4222:4222" - - "8222:8222" - command: ["--jetstream", "--http_port", "8222"] - healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:8222/healthz"] - interval: 5s - timeout: 3s - retries: 5 - - mongodb: - image: mongo:8 - ports: - - "27017:27017" - volumes: - - mongo-data:/data/db - healthcheck: - test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"] - interval: 5s - timeout: 3s - retries: 5 - - cassandra: - image: cassandra:5 - ports: - - "9042:9042" - volumes: - - cassandra-data:/var/lib/cassandra - environment: - - CASSANDRA_CLUSTER_NAME=chat-dev - healthcheck: - test: ["CMD", "cqlsh", "-e", "DESCRIBE KEYSPACES"] - interval: 10s - timeout: 5s - retries: 10 - start_period: 30s - - cassandra-init: - image: cassandra:5 - depends_on: - cassandra: - condition: service_healthy - restart: "no" - entrypoint: ["/bin/bash", "-c"] - command: - - | - set -e - cqlsh cassandra <<'CQL' - CREATE KEYSPACE IF NOT EXISTS chat WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; - CREATE TYPE IF NOT EXISTS chat."Participant" (id TEXT, eng_name TEXT, company_name TEXT, app_id TEXT, app_name TEXT, is_bot BOOLEAN, account TEXT); - CREATE TYPE IF NOT EXISTS chat."File" (id TEXT, name TEXT, type TEXT); - CREATE TYPE IF NOT EXISTS chat."Card" (template TEXT, data BLOB); - CREATE TYPE IF NOT EXISTS chat."CardAction" (verb TEXT, text TEXT, card_id TEXT, display_text TEXT, hide_exec_log BOOLEAN, card_tmid TEXT, data BLOB); - CREATE TYPE IF NOT EXISTS chat."QuotedParentMessage" (message_id TEXT, room_id TEXT, sender FROZEN<"Participant">, created_at TIMESTAMP, msg TEXT, mentions SET>, attachments LIST, message_link TEXT, thread_parent_id TEXT, thread_parent_created_at TIMESTAMP); - CREATE TABLE IF NOT EXISTS chat.messages_by_room ( - room_id TEXT, - created_at TIMESTAMP, - message_id TEXT, - sender FROZEN<"Participant">, - target_user FROZEN<"Participant">, - msg TEXT, - mentions SET>, - attachments LIST, - file FROZEN<"File">, - card FROZEN<"Card">, - card_action FROZEN<"CardAction">, - tshow BOOLEAN, - tcount INT, - thread_parent_id TEXT, - thread_parent_created_at TIMESTAMP, - quoted_parent_message FROZEN<"QuotedParentMessage">, - visible_to TEXT, - reactions MAP>>>, - deleted BOOLEAN, - type TEXT, - sys_msg_data BLOB, - site_id TEXT, - edited_at TIMESTAMP, - updated_at TIMESTAMP, - PRIMARY KEY ((room_id), created_at, message_id) - ) WITH CLUSTERING ORDER BY (created_at DESC, message_id DESC); - CREATE TABLE IF NOT EXISTS chat.messages_by_id ( - message_id TEXT, - room_id TEXT, - thread_room_id TEXT, - sender FROZEN<"Participant">, - target_user FROZEN<"Participant">, - msg TEXT, - mentions SET>, - attachments LIST, - file FROZEN<"File">, - card FROZEN<"Card">, - card_action FROZEN<"CardAction">, - tshow BOOLEAN, - tcount INT, - thread_parent_id TEXT, - thread_parent_created_at TIMESTAMP, - quoted_parent_message FROZEN<"QuotedParentMessage">, - visible_to TEXT, - reactions MAP>>>, - deleted BOOLEAN, - type TEXT, - sys_msg_data BLOB, - site_id TEXT, - edited_at TIMESTAMP, - created_at TIMESTAMP, - updated_at TIMESTAMP, - pinned_at TIMESTAMP, - pinned_by FROZEN<"Participant">, - PRIMARY KEY (message_id, created_at) - ) WITH CLUSTERING ORDER BY (created_at DESC); - CQL - echo "Schema initialized successfully" - - history: - build: - context: ../.. - dockerfile: history-service/deploy/Dockerfile - depends_on: - nats: - condition: service_healthy - mongodb: - condition: service_healthy - cassandra-init: - condition: service_completed_successfully - environment: - NATS_URL: nats://nats:4222 - SITE_ID: site-local - MONGO_URI: mongodb://mongodb:27017 - MONGO_DB: chat - CASSANDRA_HOSTS: cassandra - CASSANDRA_KEYSPACE: chat - ports: - - "8080:8080" - -volumes: - mongo-data: - cassandra-data: diff --git a/history-service/internal/cassrepo/main_test.go b/history-service/internal/cassrepo/main_test.go new file mode 100644 index 000000000..ed7ce21cd --- /dev/null +++ b/history-service/internal/cassrepo/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package cassrepo + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/history-service/internal/mongorepo/main_test.go b/history-service/internal/mongorepo/main_test.go new file mode 100644 index 000000000..22ef73259 --- /dev/null +++ b/history-service/internal/mongorepo/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package mongorepo + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/history-service/internal/service/integration_test.go b/history-service/internal/service/integration_test.go index 8b7c7d254..abd7b546c 100644 --- a/history-service/internal/service/integration_test.go +++ b/history-service/internal/service/integration_test.go @@ -1,6 +1,6 @@ //go:build integration -package service_test +package service import ( "context" @@ -16,7 +16,6 @@ import ( "github.com/hmchangw/chat/history-service/internal/cassrepo" "github.com/hmchangw/chat/history-service/internal/models" - "github.com/hmchangw/chat/history-service/internal/service" "github.com/hmchangw/chat/pkg/model" "github.com/hmchangw/chat/pkg/msgbucket" "github.com/hmchangw/chat/pkg/natsrouter" @@ -139,7 +138,7 @@ func TestEditMessage_Integration(t *testing.T) { session := setupCassandra(t) repo := cassrepo.NewRepository(session, msgbucket.New(24*time.Hour), 365) pub := &recordingPublisher{} - svc := service.New(repo, alwaysSubscribedRepo{}, stubRoomRepo{}, pub, nil, 730*24*time.Hour) + svc := New(repo, alwaysSubscribedRepo{}, stubRoomRepo{}, pub, nil, 730*24*time.Hour) sender := models.Participant{ID: "u1", Account: "alice"} roomID := "r-integ" @@ -202,7 +201,7 @@ func TestDeleteMessage_Integration(t *testing.T) { session := setupCassandra(t) repo := cassrepo.NewRepository(session, msgbucket.New(24*time.Hour), 365) pub := &recordingPublisher{} - svc := service.New(repo, alwaysSubscribedRepo{}, stubRoomRepo{}, pub, nil, 730*24*time.Hour) + svc := New(repo, alwaysSubscribedRepo{}, stubRoomRepo{}, pub, nil, 730*24*time.Hour) sender := models.Participant{ID: "u1", Account: "alice"} roomID := "r-del-integ" @@ -262,7 +261,7 @@ func TestDeleteMessage_ParentWithReplies_NoCascade(t *testing.T) { session := setupCassandra(t) repo := cassrepo.NewRepository(session, msgbucket.New(24*time.Hour), 365) pub := &recordingPublisher{} - svc := service.New(repo, alwaysSubscribedRepo{}, stubRoomRepo{}, pub, nil, 730*24*time.Hour) + svc := New(repo, alwaysSubscribedRepo{}, stubRoomRepo{}, pub, nil, 730*24*time.Hour) sender := models.Participant{ID: "u1", Account: "alice"} roomID := "r-parent-cascade" diff --git a/history-service/internal/service/main_test.go b/history-service/internal/service/main_test.go new file mode 100644 index 000000000..4188eb562 --- /dev/null +++ b/history-service/internal/service/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package service + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/inbox-worker/integration_test.go b/inbox-worker/integration_test.go index 8eb8599e6..876d48264 100644 --- a/inbox-worker/integration_test.go +++ b/inbox-worker/integration_test.go @@ -13,7 +13,6 @@ import ( "github.com/nats-io/nats.go/jetstream" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - natsmod "github.com/testcontainers/testcontainers-go/modules/nats" "go.mongodb.org/mongo-driver/v2/bson" "go.mongodb.org/mongo-driver/v2/mongo" @@ -21,7 +20,6 @@ import ( "github.com/hmchangw/chat/pkg/stream" "github.com/hmchangw/chat/pkg/subject" "github.com/hmchangw/chat/pkg/testutil" - "github.com/hmchangw/chat/pkg/testutil/testimages" ) func setupMongo(t *testing.T) *mongo.Database { @@ -577,20 +575,13 @@ func TestHandleMemberAdded_DM_PersistsRemoteCounterpartSub(t *testing.T) { assert.False(t, bobSub.IsSubscribed, "DM does not set IsSubscribed=true") } -// setupNATS starts a NATS container with JetStream enabled and returns a -// JetStream client tied to the test's lifetime. +// setupNATS connects to the process-shared NATS (JetStream enabled in +// testutil) and returns a JetStream client tied to the test's lifetime. func setupNATS(t *testing.T) (context.Context, jetstream.JetStream) { t.Helper() ctx := context.Background() - c, err := natsmod.Run(ctx, testimages.NATS) - require.NoError(t, err) - t.Cleanup(func() { _ = c.Terminate(ctx) }) - - url, err := c.ConnectionString(ctx) - require.NoError(t, err) - - nc, err := nats.Connect(url) + nc, err := nats.Connect(testutil.NATS(t)) require.NoError(t, err) t.Cleanup(func() { nc.Close() }) diff --git a/inbox-worker/main_test.go b/inbox-worker/main_test.go new file mode 100644 index 000000000..937f8531a --- /dev/null +++ b/inbox-worker/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package main + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/message-worker/main_test.go b/message-worker/main_test.go new file mode 100644 index 000000000..937f8531a --- /dev/null +++ b/message-worker/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package main + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/notification-worker/main_test.go b/notification-worker/main_test.go new file mode 100644 index 000000000..937f8531a --- /dev/null +++ b/notification-worker/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package main + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/minioutil/main_test.go b/pkg/minioutil/main_test.go new file mode 100644 index 000000000..cad311b02 --- /dev/null +++ b/pkg/minioutil/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package minioutil + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/mongoutil/main_test.go b/pkg/mongoutil/main_test.go new file mode 100644 index 000000000..4f92525e2 --- /dev/null +++ b/pkg/mongoutil/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package mongoutil + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/natsrouter/integration_test.go b/pkg/natsrouter/integration_test.go index ad6e91220..61d910b38 100644 --- a/pkg/natsrouter/integration_test.go +++ b/pkg/natsrouter/integration_test.go @@ -1,6 +1,6 @@ //go:build integration -package natsrouter_test +package natsrouter import ( "context" @@ -13,37 +13,20 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - tcnats "github.com/testcontainers/testcontainers-go/modules/nats" "github.com/Marz32onE/instrumentation-go/otel-nats/otelnats" - "github.com/hmchangw/chat/pkg/natsrouter" - "github.com/hmchangw/chat/pkg/testutil/testimages" + "github.com/hmchangw/chat/pkg/testutil" ) -// setupNATS starts a real NATS container and returns a connected otelnats -// client. Required to surface timing races that in-process NATS cannot +// setupNATS returns an otelnats client connected to the process-shared +// NATS. Required to surface timing races that in-process NATS cannot // reproduce (real TCP, real server dispatch goroutines, real latency). func setupNATS(t *testing.T) *otelnats.Conn { t.Helper() - ctx := context.Background() - - container, err := tcnats.Run(ctx, testimages.NATS) - require.NoError(t, err, "start NATS container") - t.Cleanup(func() { - // Best-effort container teardown; failures here don't affect outcome. - if err := container.Terminate(ctx); err != nil { - t.Logf("terminate nats container: %v", err) - } - }) - - url, err := container.ConnectionString(ctx) - require.NoError(t, err, "nats connection string") - - nc, err := otelnats.Connect(url) + nc, err := otelnats.Connect(testutil.NATS(t)) require.NoError(t, err, "connect to NATS") t.Cleanup(nc.Close) - return nc } @@ -66,17 +49,17 @@ type echoResp struct { // override is needed. func TestIntegration_ConcurrentRequestsWithCopy(t *testing.T) { nc := setupNATS(t) - r := natsrouter.New(nc, "integration-concurrent") - r.Use(natsrouter.RequestID()) - r.Use(natsrouter.Recovery()) - r.Use(natsrouter.Logging()) + r := New(nc, "integration-concurrent") + r.Use(RequestID()) + r.Use(Recovery()) + r.Use(Logging()) // Async goroutines use Copy() — we count them to prove they all ran. var asyncCompleted atomic.Int64 var asyncStarted sync.WaitGroup - natsrouter.Register(r, "chat.user.{account}.echo.{room}", - func(c *natsrouter.Context, req echoReq) (*echoResp, error) { + Register(r, "chat.user.{account}.echo.{room}", + func(c *Context, req echoReq) (*echoResp, error) { c.Set("account", c.Param("account")) c.Set("room", c.Param("room")) @@ -154,13 +137,13 @@ func TestIntegration_ShutdownUnderLoad(t *testing.T) { for cycle := 0; cycle < cycles; cycle++ { t.Run(fmt.Sprintf("cycle-%d", cycle), func(t *testing.T) { nc := setupNATS(t) - r := natsrouter.New(nc, "integration-shutdown") + r := New(nc, "integration-shutdown") var completed atomic.Int64 started := make(chan struct{}) var startOnce sync.Once - natsrouter.Register(r, "load.{id}", - func(c *natsrouter.Context, req echoReq) (*echoResp, error) { + Register(r, "load.{id}", + func(c *Context, req echoReq) (*echoResp, error) { startOnce.Do(func() { close(started) }) time.Sleep(time.Duration(1+req.Seq%7) * time.Millisecond) completed.Add(1) @@ -201,7 +184,7 @@ func TestIntegration_ShutdownUnderLoad(t *testing.T) { // reply rather than blocking. func TestIntegration_BusyReplyOnSaturation(t *testing.T) { nc := setupNATS(t) - r := natsrouter.New(nc, "integration-busy", natsrouter.WithMaxConcurrency(1)) + r := New(nc, "integration-busy", WithMaxConcurrency(1)) gate := make(chan struct{}) // Safety net: if any assertion below fails before we close the gate, @@ -220,8 +203,8 @@ func TestIntegration_BusyReplyOnSaturation(t *testing.T) { // signals `entered` before blocking on `gate`, so the busy-reply poll // only starts once the slot is genuinely held. entered := make(chan struct{}, 1) - natsrouter.Register(r, "busy.{id}", - func(c *natsrouter.Context, req echoReq) (*echoResp, error) { + Register(r, "busy.{id}", + func(c *Context, req echoReq) (*echoResp, error) { select { case entered <- struct{}{}: default: @@ -259,9 +242,9 @@ func TestIntegration_BusyReplyOnSaturation(t *testing.T) { data, _ := json.Marshal(echoReq{Seq: 2}) resp, err := nc.Request(context.Background(), "busy.2", data, 2*time.Second) require.NoError(t, err) - var re natsrouter.RouteError + var re RouteError require.NoError(t, json.Unmarshal(resp.Data, &re)) - assert.Equal(t, natsrouter.CodeUnavailable, re.Code, "expected busy reply once slot is held") + assert.Equal(t, CodeUnavailable, re.Code, "expected busy reply once slot is held") // Release the gate; first request must complete normally. close(gate) @@ -287,10 +270,10 @@ func TestIntegration_SpawnSitePanicBackstop(t *testing.T) { // the follow-up "ok" request acquire a slot even if cleanup were // broken, masking the regression. cap=1 forces the test to actually // observe slot release. - r := natsrouter.New(nc, "integration-panic-backstop", natsrouter.WithMaxConcurrency(1)) + r := New(nc, "integration-panic-backstop", WithMaxConcurrency(1)) - natsrouter.Register(r, "boom.{id}", - func(c *natsrouter.Context, req echoReq) (*echoResp, error) { + Register(r, "boom.{id}", + func(c *Context, req echoReq) (*echoResp, error) { panic("intentional handler panic") }) @@ -307,8 +290,8 @@ func TestIntegration_SpawnSitePanicBackstop(t *testing.T) { assert.Equal(t, "internal error", payload.Error, "expected internal error reply from backstop") // Process survived: a follow-up normal request must succeed. - natsrouter.Register(r, "ok.{id}", - func(c *natsrouter.Context, req echoReq) (*echoResp, error) { + Register(r, "ok.{id}", + func(c *Context, req echoReq) (*echoResp, error) { return &echoResp{Seq: req.Seq}, nil }) data, _ = json.Marshal(echoReq{Seq: 2}) @@ -324,7 +307,7 @@ func TestIntegration_SpawnSitePanicBackstop(t *testing.T) { // model) have returned, not merely until the dispatcher has stopped. func TestIntegration_ShutdownWaitsForSpawnedHandlers(t *testing.T) { nc := setupNATS(t) - r := natsrouter.New(nc, "integration-shutdown-wg", natsrouter.WithMaxConcurrency(8)) + r := New(nc, "integration-shutdown-wg", WithMaxConcurrency(8)) gate := make(chan struct{}) // Safety net: any test failure before close(gate) below would pin @@ -341,8 +324,8 @@ func TestIntegration_ShutdownWaitsForSpawnedHandlers(t *testing.T) { }() var entered atomic.Int64 var completed atomic.Int64 - natsrouter.Register(r, "wg.{id}", - func(c *natsrouter.Context, req echoReq) (*echoResp, error) { + Register(r, "wg.{id}", + func(c *Context, req echoReq) (*echoResp, error) { entered.Add(1) <-gate completed.Add(1) @@ -416,13 +399,13 @@ func TestIntegration_MultipleRouterInstances(t *testing.T) { const queue = "integration-queue-group" const instances = 3 - routers := make([]*natsrouter.Router, instances) + routers := make([]*Router, instances) hits := make([]atomic.Int64, instances) for idx := 0; idx < instances; idx++ { idx := idx - r := natsrouter.New(nc, queue) - natsrouter.Register(r, "qg.work.{id}", - func(c *natsrouter.Context, req echoReq) (*echoResp, error) { + r := New(nc, queue) + Register(r, "qg.work.{id}", + func(c *Context, req echoReq) (*echoResp, error) { hits[idx].Add(1) return &echoResp{Seq: req.Seq}, nil }) @@ -449,7 +432,7 @@ func TestIntegration_MultipleRouterInstances(t *testing.T) { // Each Shutdown call gets its own deadline. Reusing one ticking context // would mean the cleanup loop could see an already-expired ctx after // the warmup-shutdown + 100 sequential RPCs above. - shutdown := func(r *natsrouter.Router) { + shutdown := func(r *Router) { ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() require.NoError(t, r.Shutdown(ctx)) diff --git a/pkg/natsrouter/main_test.go b/pkg/natsrouter/main_test.go new file mode 100644 index 000000000..43854be72 --- /dev/null +++ b/pkg/natsrouter/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package natsrouter + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/roomcrypto/main_test.go b/pkg/roomcrypto/main_test.go new file mode 100644 index 000000000..126394e46 --- /dev/null +++ b/pkg/roomcrypto/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package roomcrypto + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/roomkeysender/integration_test.go b/pkg/roomkeysender/integration_test.go index 1ff0ea59c..efd1210e5 100644 --- a/pkg/roomkeysender/integration_test.go +++ b/pkg/roomkeysender/integration_test.go @@ -1,6 +1,6 @@ //go:build integration -package roomkeysender_test +package roomkeysender import ( "bytes" @@ -27,7 +27,6 @@ import ( "github.com/hmchangw/chat/pkg/model" "github.com/hmchangw/chat/pkg/roomcrypto" - "github.com/hmchangw/chat/pkg/roomkeysender" "github.com/hmchangw/chat/pkg/testutil/testimages" ) @@ -289,7 +288,7 @@ func TestRoomKeySender_TypeScriptClient(t *testing.T) { time.Sleep(3 * time.Second) // 6. Publish room key via roomkeysender. - sender := roomkeysender.NewSender(nc) + sender := NewSender(nc) evt := &model.RoomKeyEvent{ RoomID: roomID, Version: version, diff --git a/pkg/roomkeysender/main_test.go b/pkg/roomkeysender/main_test.go new file mode 100644 index 000000000..598e48bed --- /dev/null +++ b/pkg/roomkeysender/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package roomkeysender + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/roomkeystore/main_test.go b/pkg/roomkeystore/main_test.go new file mode 100644 index 000000000..1b558b7ea --- /dev/null +++ b/pkg/roomkeystore/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package roomkeystore + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/roomsubcache/integration_test.go b/pkg/roomsubcache/integration_test.go index fea394fa4..220cb0147 100644 --- a/pkg/roomsubcache/integration_test.go +++ b/pkg/roomsubcache/integration_test.go @@ -1,6 +1,6 @@ //go:build integration -package roomsubcache_test +package roomsubcache import ( "context" @@ -10,22 +10,22 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/hmchangw/chat/pkg/roomsubcache" "github.com/hmchangw/chat/pkg/testutil" "github.com/hmchangw/chat/pkg/valkeyutil" ) func setupValkey(t *testing.T) valkeyutil.Client { t.Helper() - return valkeyutil.WrapClusterClient(testutil.StartValkeyCluster(t)) + t.Cleanup(func() { testutil.FlushValkey(t) }) + return valkeyutil.WrapClusterClient(testutil.SharedValkeyCluster(t)) } func TestValkeyCache_Integration_SetGetInvalidate(t *testing.T) { client := setupValkey(t) - cache := roomsubcache.NewValkeyCache(client) + cache := NewValkeyCache(client) ctx := context.Background() - members := []roomsubcache.Member{ + members := []Member{ {ID: "u1", Account: "alice"}, {ID: "u2", Account: "bob"}, } @@ -43,7 +43,7 @@ func TestValkeyCache_Integration_SetGetInvalidate(t *testing.T) { func TestValkeyCache_Integration_MissOnUnsetRoom(t *testing.T) { client := setupValkey(t) - cache := roomsubcache.NewValkeyCache(client) + cache := NewValkeyCache(client) ctx := context.Background() _, err := cache.Get(ctx, "never-set") @@ -52,10 +52,10 @@ func TestValkeyCache_Integration_MissOnUnsetRoom(t *testing.T) { func TestValkeyCache_Integration_TTLExpires(t *testing.T) { client := setupValkey(t) - cache := roomsubcache.NewValkeyCache(client) + cache := NewValkeyCache(client) ctx := context.Background() - require.NoError(t, cache.Set(ctx, "room-ttl", []roomsubcache.Member{{ID: "u1", Account: "a"}}, time.Second)) + require.NoError(t, cache.Set(ctx, "room-ttl", []Member{{ID: "u1", Account: "a"}}, time.Second)) // Poll for expiry — Valkey honors TTL with sub-second granularity but // asserting on a precise deadline is flaky. Allow up to 5s. @@ -73,10 +73,10 @@ func TestValkeyCache_Integration_TTLExpires(t *testing.T) { func TestValkeyCache_Integration_EmptyListIsCacheHit(t *testing.T) { client := setupValkey(t) - cache := roomsubcache.NewValkeyCache(client) + cache := NewValkeyCache(client) ctx := context.Background() - require.NoError(t, cache.Set(ctx, "empty-room", []roomsubcache.Member{}, time.Minute)) + require.NoError(t, cache.Set(ctx, "empty-room", []Member{}, time.Minute)) got, err := cache.Get(ctx, "empty-room") require.NoError(t, err) diff --git a/pkg/roomsubcache/main_test.go b/pkg/roomsubcache/main_test.go new file mode 100644 index 000000000..29f35c6de --- /dev/null +++ b/pkg/roomsubcache/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package roomsubcache + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/testutil/cassandra.go b/pkg/testutil/cassandra.go index 370edb086..ece7f82de 100644 --- a/pkg/testutil/cassandra.go +++ b/pkg/testutil/cassandra.go @@ -6,6 +6,7 @@ import ( "context" "fmt" "hash/fnv" + "os" "sync" "testing" "time" @@ -18,10 +19,11 @@ import ( const cassandraImage = "cassandra:5" var ( - cassOnce sync.Once - cassHost string - cassSession *gocql.Session - cassInitErr error + cassOnce sync.Once + cassContainer testcontainers.Container + cassHost string + cassSession *gocql.Session + cassInitErr error ) func ensureCassandraSession() (string, *gocql.Session, error) { @@ -70,10 +72,32 @@ func ensureCassandraSession() (string, *gocql.Session, error) { } cassHost = addr cassSession = s + cassContainer = container }) return cassHost, cassSession, cassInitErr } +// TerminateCassandra closes the shared session and stops the shared +// container. Best-effort, idempotent. +func TerminateCassandra() { + if cassSession != nil { + cassSession.Close() + cassSession = nil + } + if cassContainer != nil { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := cassContainer.Terminate(ctx); err != nil { + fmt.Fprintf(os.Stderr, "terminate shared cassandra: %v\n", err) + } + cassContainer = nil + } +} + +// EnsureCassandra starts the shared Cassandra container if not already +// started. No-t variant intended for TestMain pre-warming. +func EnsureCassandra() error { _, _, err := ensureCassandraSession(); return err } + // CassandraKeyspace creates an isolated keyspace for the test (SimpleStrategy, RF=1). // Returns the keyspace name, an admin session for DDL, and the container host. func CassandraKeyspace(t *testing.T, prefix string) (keyspace string, admin *gocql.Session, hostAddr string) { diff --git a/pkg/testutil/elasticsearch.go b/pkg/testutil/elasticsearch.go new file mode 100644 index 000000000..b9f2ab1de --- /dev/null +++ b/pkg/testutil/elasticsearch.go @@ -0,0 +1,126 @@ +//go:build integration + +package testutil + +import ( + "context" + "fmt" + "hash/fnv" + "net/http" + "os" + "sync" + "testing" + "time" + + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" + + "github.com/hmchangw/chat/pkg/testutil/testimages" +) + +// esCleanupHTTPClient is a bounded HTTP client for the index-delete cleanup +// in ElasticsearchIndex. Stalled containers shouldn't hang test exit. +var esCleanupHTTPClient = &http.Client{Timeout: 10 * time.Second} + +var ( + esOnce sync.Once + esContainer testcontainers.Container + esURL string + esInitErr error +) + +func ensureElasticsearch() (string, error) { + esOnce.Do(func() { + ctx := context.Background() + container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ + ContainerRequest: testcontainers.ContainerRequest{ + Image: testimages.Elasticsearch, + ExposedPorts: []string{"9200/tcp"}, + Env: map[string]string{ + "discovery.type": "single-node", + "xpack.security.enabled": "false", + "ES_JAVA_OPTS": "-Xms256m -Xmx256m", + "cluster.routing.allocation.disk.threshold_enabled": "false", + }, + WaitingFor: wait.ForAll( + wait.ForHTTP("/").WithPort("9200/tcp").WithStartupTimeout(120*time.Second), + wait.ForHTTP("/_cluster/health?wait_for_status=yellow&timeout=60s"). + WithPort("9200/tcp"). + WithStartupTimeout(120*time.Second), + ), + }, + Started: true, + }) + if err != nil { + esInitErr = fmt.Errorf("start elasticsearch: %w", err) + return + } + host, err := container.Host(ctx) + if err != nil { + _ = container.Terminate(ctx) + esInitErr = fmt.Errorf("get es host: %w", err) + return + } + port, err := container.MappedPort(ctx, "9200") + if err != nil { + _ = container.Terminate(ctx) + esInitErr = fmt.Errorf("get es port: %w", err) + return + } + esContainer = container + esURL = fmt.Sprintf("http://%s:%s", host, port.Port()) + }) + return esURL, esInitErr +} + +// Elasticsearch returns the URL of a process-shared single-node ES container. +func Elasticsearch(t *testing.T) string { + t.Helper() + u, err := ensureElasticsearch() + if err != nil { + t.Fatalf("testutil.Elasticsearch: %v", err) + } + return u +} + +// EnsureElasticsearch is the no-t variant for TestMain pre-warming. +func EnsureElasticsearch() error { _, err := ensureElasticsearch(); return err } + +// ElasticsearchIndex returns a per-test index name (fnv hash of t.Name() +// keeps it short and ES-safe across subtest slashes) and registers a +// DELETE on cleanup so sibling tests start clean. +func ElasticsearchIndex(t *testing.T, prefix string) string { + t.Helper() + url := Elasticsearch(t) + h := fnv.New64a() + _, _ = h.Write([]byte(t.Name())) + name := fmt.Sprintf("%s-%x", prefix, h.Sum64()) + t.Cleanup(func() { + req, err := http.NewRequest(http.MethodDelete, url+"/"+name, nil) + if err != nil { + t.Logf("delete index %s: build request: %v", name, err) + return + } + resp, err := esCleanupHTTPClient.Do(req) + if err != nil { + t.Logf("delete index %s: %v", name, err) + return + } + _ = resp.Body.Close() + }) + return name +} + +// TerminateElasticsearch stops the shared ES container. Best-effort and +// idempotent — safe to call from TestMain even if no test touched ES. +func TerminateElasticsearch() { + if esContainer == nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := esContainer.Terminate(ctx); err != nil { + fmt.Fprintf(os.Stderr, "terminate shared elasticsearch: %v\n", err) + } + esContainer = nil +} diff --git a/pkg/testutil/init.go b/pkg/testutil/init.go new file mode 100644 index 000000000..30b7ba214 --- /dev/null +++ b/pkg/testutil/init.go @@ -0,0 +1,14 @@ +//go:build integration + +package testutil + +import "os" + +// Disable testcontainers Ryuk reaper repo-wide; our CI runner can't +// run the sidecar. Cleanup is handled by TerminateAll. Set +// TESTCONTAINERS_RYUK_DISABLED=false to flip back on locally. +func init() { + if _, set := os.LookupEnv("TESTCONTAINERS_RYUK_DISABLED"); !set { + _ = os.Setenv("TESTCONTAINERS_RYUK_DISABLED", "true") + } +} diff --git a/pkg/testutil/minio.go b/pkg/testutil/minio.go index c10aac0e0..1013c1889 100644 --- a/pkg/testutil/minio.go +++ b/pkg/testutil/minio.go @@ -6,6 +6,7 @@ import ( "context" "fmt" "hash/fnv" + "os" "strings" "sync" "testing" @@ -13,15 +14,17 @@ import ( "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/testcontainers/testcontainers-go" tcminio "github.com/testcontainers/testcontainers-go/modules/minio" "github.com/hmchangw/chat/pkg/testutil/testimages" ) var ( - minioOnce sync.Once - minioClient *minio.Client - minioInitErr error + minioOnce sync.Once + minioClient *minio.Client + minioContainer testcontainers.Container + minioInitErr error ) func ensureMinIOClient() (*minio.Client, error) { @@ -54,10 +57,28 @@ func ensureMinIOClient() (*minio.Client, error) { return } minioClient = c + minioContainer = container }) return minioClient, minioInitErr } +// TerminateMinIO stops the shared MinIO container. Best-effort, idempotent. +func TerminateMinIO() { + if minioContainer == nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := minioContainer.Terminate(ctx); err != nil { + fmt.Fprintf(os.Stderr, "terminate shared minio: %v\n", err) + } + minioContainer = nil +} + +// EnsureMinIO starts the shared MinIO container if not already started. +// No-t variant intended for TestMain pre-warming. +func EnsureMinIO() error { _, err := ensureMinIOClient(); return err } + // MinIO returns a shared client + per-test bucket (fnv-hashed from t.Name(); cleaned up via t.Cleanup). // Prefix must be S3-valid (3-46 lowercase chars/digits/hyphens, no leading/trailing hyphen); not validated. func MinIO(t *testing.T, prefix string) (*minio.Client, string) { diff --git a/pkg/testutil/mongo.go b/pkg/testutil/mongo.go index 5bbe4f4ad..af8afc16d 100644 --- a/pkg/testutil/mongo.go +++ b/pkg/testutil/mongo.go @@ -6,10 +6,12 @@ import ( "context" "fmt" "hash/fnv" + "os" "sync" "testing" "time" + "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/modules/mongodb" "go.mongodb.org/mongo-driver/v2/mongo" "go.mongodb.org/mongo-driver/v2/mongo/options" @@ -18,9 +20,10 @@ import ( ) var ( - mongoOnce sync.Once - mongoClient *mongo.Client - mongoInitErr error + mongoOnce sync.Once + mongoClient *mongo.Client + mongoContainer testcontainers.Container + mongoInitErr error ) func ensureMongoClient() (*mongo.Client, error) { @@ -44,10 +47,34 @@ func ensureMongoClient() (*mongo.Client, error) { return } mongoClient = c + mongoContainer = container }) return mongoClient, mongoInitErr } +// TerminateMongo disconnects the shared client and stops the shared +// container. Best-effort and idempotent — safe to call from any TestMain. +func TerminateMongo() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if mongoClient != nil { + if err := mongoClient.Disconnect(ctx); err != nil { + fmt.Fprintf(os.Stderr, "disconnect shared mongo client: %v\n", err) + } + mongoClient = nil + } + if mongoContainer != nil { + if err := mongoContainer.Terminate(ctx); err != nil { + fmt.Fprintf(os.Stderr, "terminate shared mongo: %v\n", err) + } + mongoContainer = nil + } +} + +// EnsureMongo starts the shared Mongo container if not already started. +// No-t variant intended for TestMain pre-warming. +func EnsureMongo() error { _, err := ensureMongoClient(); return err } + // MongoDB returns an isolated Mongo database for the current test; dropped on t.Cleanup. func MongoDB(t *testing.T, prefix string) *mongo.Database { t.Helper() diff --git a/pkg/testutil/nats.go b/pkg/testutil/nats.go new file mode 100644 index 000000000..2d7997445 --- /dev/null +++ b/pkg/testutil/nats.go @@ -0,0 +1,80 @@ +//go:build integration + +package testutil + +import ( + "context" + "fmt" + "os" + "sync" + "testing" + "time" + + "github.com/testcontainers/testcontainers-go" + natsmod "github.com/testcontainers/testcontainers-go/modules/nats" + "github.com/testcontainers/testcontainers-go/wait" + + "github.com/hmchangw/chat/pkg/testutil/testimages" +) + +var ( + natsOnce sync.Once + natsContainer testcontainers.Container + natsURL string + natsInitErr error +) + +// JetStream is enabled unconditionally so consumers that publish/consume +// through streams (search-sync-worker, inbox-worker, etc.) Just Work +// against the shared container. Consumers that only use core NATS +// request/reply pay nothing extra — JS is dormant until used. +func ensureNATS() (string, error) { + natsOnce.Do(func() { + ctx := context.Background() + c, err := natsmod.Run(ctx, testimages.NATS, + testcontainers.WithCmdArgs("--jetstream"), + testcontainers.WithWaitStrategy(wait.ForLog("Server is ready").WithStartupTimeout(60*time.Second)), + ) + if err != nil { + natsInitErr = fmt.Errorf("start nats: %w", err) + return + } + url, err := c.ConnectionString(ctx) + if err != nil { + _ = c.Terminate(ctx) + natsInitErr = fmt.Errorf("get nats url: %w", err) + return + } + natsContainer = c + natsURL = url + }) + return natsURL, natsInitErr +} + +// NATS returns the URL of a process-shared NATS container with JetStream +// enabled. +func NATS(t *testing.T) string { + t.Helper() + u, err := ensureNATS() + if err != nil { + t.Fatalf("testutil.NATS: %v", err) + } + return u +} + +// EnsureNATS starts the shared NATS container if not already started. +// No-t variant intended for TestMain pre-warming. +func EnsureNATS() error { _, err := ensureNATS(); return err } + +// TerminateNATS stops the shared NATS container. Best-effort, idempotent. +func TerminateNATS() { + if natsContainer == nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := natsContainer.Terminate(ctx); err != nil { + fmt.Fprintf(os.Stderr, "terminate shared nats: %v\n", err) + } + natsContainer = nil +} diff --git a/pkg/testutil/terminate.go b/pkg/testutil/terminate.go new file mode 100644 index 000000000..324ae3a34 --- /dev/null +++ b/pkg/testutil/terminate.go @@ -0,0 +1,14 @@ +//go:build integration + +package testutil + +// TerminateAll stops every process-shared container. Each TerminateXxx +// is a no-op if its container was never started. +func TerminateAll() { + TerminateMongo() + TerminateCassandra() + TerminateMinIO() + TerminateElasticsearch() + TerminateNATS() + TerminateValkey() +} diff --git a/pkg/testutil/testmain.go b/pkg/testutil/testmain.go new file mode 100644 index 000000000..8563e6f62 --- /dev/null +++ b/pkg/testutil/testmain.go @@ -0,0 +1,51 @@ +//go:build integration + +package testutil + +import ( + "fmt" + "os" + "sync" + "testing" +) + +// RunTests runs m.Run, terminates shared containers, and exits. +// Usage: func TestMain(m *testing.M) { testutil.RunTests(m) } +func RunTests(m *testing.M) { + code := m.Run() + TerminateAll() + os.Exit(code) +} + +// PrewarmFailFast runs each Ensure* concurrently and returns the first +// error, or nil if all succeed. Intended for use in TestMain before m.Run. +func PrewarmFailFast(fns ...func() error) error { + var wg sync.WaitGroup + errCh := make(chan error, len(fns)) + for _, fn := range fns { + wg.Add(1) + go func(f func() error) { + defer wg.Done() + if err := f(); err != nil { + errCh <- err + } + }(fn) + } + wg.Wait() + close(errCh) + if err, ok := <-errCh; ok { + return err + } + return nil +} + +// RunTestsWithPrewarm pre-warms via PrewarmFailFast, then RunTests. +// On prewarm failure, exits with code 1 after TerminateAll cleanup. +func RunTestsWithPrewarm(m *testing.M, prewarms ...func() error) { + if err := PrewarmFailFast(prewarms...); err != nil { + fmt.Fprintf(os.Stderr, "prewarm shared containers: %v\n", err) + TerminateAll() + os.Exit(1) + } + RunTests(m) +} diff --git a/pkg/testutil/valkey.go b/pkg/testutil/valkey.go index 6a447a161..31c6d9a8d 100644 --- a/pkg/testutil/valkey.go +++ b/pkg/testutil/valkey.go @@ -6,7 +6,9 @@ import ( "context" "fmt" "io" + "os" "strings" + "sync" "testing" "time" @@ -18,16 +20,104 @@ import ( "github.com/hmchangw/chat/pkg/testutil/testimages" ) -// StartValkeyCluster starts a single-node cluster-mode Valkey container, -// assigns all 16384 hash slots to that node, and returns a connected -// *redis.ClusterClient. The ClusterSlots override routes traffic to the -// externally-mapped address rather than the internal 127.0.0.1:6379 that -// the node announces to peers — required for testcontainer port mapping. -// The container and client are terminated/closed via t.Cleanup. +// StartValkeyCluster boots a per-test cluster-mode Valkey. Use when a +// test asserts on cluster-routing state; otherwise prefer SharedValkeyCluster. func StartValkeyCluster(t *testing.T) *redis.ClusterClient { t.Helper() ctx := context.Background() + container, addr := startValkeyClusterContainer(ctx, t) + t.Cleanup(func() { _ = container.Terminate(ctx) }) + c := newValkeyClusterClient(addr) + t.Cleanup(func() { _ = c.Close() }) + require.NoError(t, pingCluster(ctx, c), "ping valkey cluster") + return c +} + +// SharedValkeyCluster returns a *redis.ClusterClient against a +// process-shared cluster-mode Valkey (started via sync.Once, reaped via +// TerminateAll). Callers must register +// `t.Cleanup(func() { testutil.FlushValkey(t) })` for keyspace isolation. +func SharedValkeyCluster(t *testing.T) *redis.ClusterClient { + t.Helper() + ensureSharedValkeyCluster() + if sharedValkeyErr != nil { + t.Fatalf("testutil.SharedValkeyCluster: %v", sharedValkeyErr) + } + return sharedValkeyClient +} + +// EnsureValkey is the no-t variant for TestMain pre-warming. +func EnsureValkey() error { ensureSharedValkeyCluster(); return sharedValkeyErr } + +// FlushValkey runs FLUSHALL on every master in the shared cluster. +// Test-fatal on error — leftover state would silently break the next test. +func FlushValkey(t *testing.T) { + t.Helper() + if sharedValkeyClient == nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + err := sharedValkeyClient.ForEachMaster(ctx, func(ctx context.Context, m *redis.Client) error { + return m.FlushAll(ctx).Err() + }) + if err != nil { + t.Errorf("flush shared valkey cluster: %v", err) + } +} + +// TerminateValkey closes the shared client/container. Idempotent. +func TerminateValkey() { + if sharedValkeyClient != nil { + _ = sharedValkeyClient.Close() + sharedValkeyClient = nil + } + if sharedValkeyContainer == nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := sharedValkeyContainer.Terminate(ctx); err != nil { + fmt.Fprintf(os.Stderr, "terminate shared valkey: %v\n", err) + } + sharedValkeyContainer = nil +} + +var ( + sharedValkeyOnce sync.Once + sharedValkeyContainer testcontainers.Container + sharedValkeyClient *redis.ClusterClient + sharedValkeyErr error +) +func ensureSharedValkeyCluster() { + sharedValkeyOnce.Do(func() { + ctx := context.Background() + container, addr, err := startValkeyClusterContainerNoT(ctx) + if err != nil { + sharedValkeyErr = fmt.Errorf("start shared valkey cluster: %w", err) + return + } + c := newValkeyClusterClient(addr) + if err := pingCluster(ctx, c); err != nil { + _ = c.Close() + _ = container.Terminate(ctx) + sharedValkeyErr = fmt.Errorf("ping shared valkey cluster: %w", err) + return + } + sharedValkeyContainer = container + sharedValkeyClient = c + }) +} + +func startValkeyClusterContainer(ctx context.Context, t *testing.T) (testcontainers.Container, string) { + t.Helper() + container, addr, err := startValkeyClusterContainerNoT(ctx) + require.NoError(t, err, "start valkey cluster container") + return container, addr +} + +func startValkeyClusterContainerNoT(ctx context.Context) (testcontainers.Container, string, error) { container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ ContainerRequest: testcontainers.ContainerRequest{ Image: testimages.Valkey, @@ -43,29 +133,52 @@ func StartValkeyCluster(t *testing.T) *redis.ClusterClient { }, Started: true, }) - require.NoError(t, err, "start valkey cluster container") - t.Cleanup(func() { _ = container.Terminate(ctx) }) - + if err != nil { + return nil, "", err + } host, err := container.Host(ctx) - require.NoError(t, err) + if err != nil { + _ = container.Terminate(ctx) + return nil, "", fmt.Errorf("get valkey host: %w", err) + } port, err := container.MappedPort(ctx, "6379") - require.NoError(t, err) + if err != nil { + _ = container.Terminate(ctx) + return nil, "", fmt.Errorf("get valkey port: %w", err) + } addr := fmt.Sprintf("%s:%s", host, port.Port()) exitCode, _, err := container.Exec(ctx, []string{"valkey-cli", "CLUSTER", "ADDSLOTSRANGE", "0", "16383"}) - require.NoError(t, err, "exec cluster addslotsrange") - require.Equal(t, 0, exitCode, "cluster addslotsrange must exit 0") + if err != nil { + _ = container.Terminate(ctx) + return nil, "", fmt.Errorf("exec cluster addslotsrange: %w", err) + } + if exitCode != 0 { + _ = container.Terminate(ctx) + return nil, "", fmt.Errorf("cluster addslotsrange exited %d", exitCode) + } - require.Eventually(t, func() bool { + deadline := time.Now().Add(10 * time.Second) + for time.Now().Before(deadline) { _, out, execErr := container.Exec(ctx, []string{"valkey-cli", "CLUSTER", "INFO"}) - if execErr != nil { - return false + if execErr == nil { + buf, _ := io.ReadAll(out) + if strings.Contains(string(buf), "cluster_state:ok") { + return container, addr, nil + } } - buf, _ := io.ReadAll(out) - return strings.Contains(string(buf), "cluster_state:ok") - }, 10*time.Second, 100*time.Millisecond, "cluster must reach ok state") + time.Sleep(100 * time.Millisecond) + } + _ = container.Terminate(ctx) + return nil, "", fmt.Errorf("cluster never reached ok state within 10s") +} - c := redis.NewClusterClient(&redis.ClusterOptions{ +// newValkeyClusterClient builds a ClusterClient that routes all 16384 +// slots to the externally-mapped addr. The ClusterSlots override is +// required because the node announces 127.0.0.1:6379 to peers (the +// container-internal address), which the host can't reach. +func newValkeyClusterClient(addr string) *redis.ClusterClient { + return redis.NewClusterClient(&redis.ClusterOptions{ Addrs: []string{addr}, ClusterSlots: func(_ context.Context) ([]redis.ClusterSlot, error) { return []redis.ClusterSlot{ @@ -73,11 +186,10 @@ func StartValkeyCluster(t *testing.T) *redis.ClusterClient { }, nil }, }) - t.Cleanup(func() { _ = c.Close() }) +} +func pingCluster(ctx context.Context, c *redis.ClusterClient) error { pingCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() - require.NoError(t, c.Ping(pingCtx).Err(), "ping valkey cluster") - - return c + return c.Ping(pingCtx).Err() } diff --git a/pkg/userstore/main_test.go b/pkg/userstore/main_test.go new file mode 100644 index 000000000..f1015b8e8 --- /dev/null +++ b/pkg/userstore/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package userstore + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/pkg/valkeyutil/integration_test.go b/pkg/valkeyutil/integration_test.go index 84bb5531a..5a2fbaee9 100644 --- a/pkg/valkeyutil/integration_test.go +++ b/pkg/valkeyutil/integration_test.go @@ -22,7 +22,8 @@ import ( // ConnectCluster's error-wrapping path is covered by TestConnectCluster_ErrorPath. func setupClusterClient(t *testing.T) Client { t.Helper() - return &clusterClient{c: testutil.StartValkeyCluster(t)} + t.Cleanup(func() { testutil.FlushValkey(t) }) + return &clusterClient{c: testutil.SharedValkeyCluster(t)} } func TestClusterRedisClient_Integration_GetSetDel(t *testing.T) { diff --git a/pkg/valkeyutil/main_test.go b/pkg/valkeyutil/main_test.go new file mode 100644 index 000000000..d0469a2c5 --- /dev/null +++ b/pkg/valkeyutil/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package valkeyutil + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/room-service/integration_test.go b/room-service/integration_test.go index 810483556..83335aa3d 100644 --- a/room-service/integration_test.go +++ b/room-service/integration_test.go @@ -17,7 +17,6 @@ import ( "github.com/nats-io/nats.go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - natsmod "github.com/testcontainers/testcontainers-go/modules/nats" "go.mongodb.org/mongo-driver/v2/bson" "go.mongodb.org/mongo-driver/v2/mongo" @@ -27,7 +26,6 @@ import ( "github.com/hmchangw/chat/pkg/roomkeystore" "github.com/hmchangw/chat/pkg/subject" "github.com/hmchangw/chat/pkg/testutil" - "github.com/hmchangw/chat/pkg/testutil/testimages" ) func setupMongo(t *testing.T) *mongo.Database { @@ -90,13 +88,7 @@ func TestCassMessageReader_GetMessageRoomAndCreatedAt_Integration(t *testing.T) func setupNATS(t *testing.T) string { t.Helper() - ctx := context.Background() - container, err := natsmod.Run(ctx, testimages.NATS) - require.NoError(t, err) - t.Cleanup(func() { _ = container.Terminate(ctx) }) - url, err := container.ConnectionString(ctx) - require.NoError(t, err) - return url + return testutil.NATS(t) } func TestMongoStore_Integration(t *testing.T) { diff --git a/room-service/main_test.go b/room-service/main_test.go new file mode 100644 index 000000000..937f8531a --- /dev/null +++ b/room-service/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package main + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/room-worker/main_test.go b/room-worker/main_test.go new file mode 100644 index 000000000..937f8531a --- /dev/null +++ b/room-worker/main_test.go @@ -0,0 +1,11 @@ +//go:build integration + +package main + +import ( + "testing" + + "github.com/hmchangw/chat/pkg/testutil" +) + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/search-service/integration_apps_test.go b/search-service/integration_apps_test.go new file mode 100644 index 000000000..ac066b975 --- /dev/null +++ b/search-service/integration_apps_test.go @@ -0,0 +1,111 @@ +//go:build integration + +package main + +// Integration tests for search.apps (Mongo + NATS; ES/Valkey stubbed). + +import ( + "context" + "encoding/json" + "testing" + "time" + + "github.com/nats-io/nats.go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.mongodb.org/mongo-driver/v2/mongo" + + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/natsrouter" + "github.com/hmchangw/chat/pkg/subject" + "github.com/hmchangw/chat/pkg/testutil" +) + +type appsFixture struct { + clientNATS *nats.Conn + mongoDB *mongo.Database +} + +func setupAppsFixture(t *testing.T) *appsFixture { + t.Helper() + mongoDB := testutil.MongoDB(t, "search_service_test") + h := newHandler(&fakeStore{}, newMongoStore(mongoDB), nil, newFakeCache(), handlerConfig{ + DocCounts: 25, + MaxDocCounts: 100, + RestrictedRoomsCacheTTL: 5 * time.Minute, + RecentWindow: 365 * 24 * time.Hour, + RequestTimeout: 5 * time.Second, + SpotlightReadPattern: "spotlight-*", + }) + clientNATS := setupRouter(t, testQueueGroup, h.Register) + return &appsFixture{clientNATS: clientNATS, mongoDB: mongoDB} +} + +func TestIntegration_SearchApps_PrototypePipeline(t *testing.T) { + f := setupAppsFixture(t) + ctx := context.Background() + + _, err := f.mongoDB.Collection("apps").InsertMany(ctx, []any{ + map[string]any{"_id": "a1", "name": "Weather Alpha", "assistant": map[string]any{"enabled": true, "name": "weather.bot"}}, + map[string]any{"_id": "a2", "name": "Weatherly", "assistant": map[string]any{"enabled": false, "name": "weatherly.bot"}}, + map[string]any{"_id": "a3", "name": "Calendar"}, + }) + require.NoError(t, err) + + reqBytes, err := json.Marshal(model.SearchAppsRequest{Query: "weather"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchApps("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var resp model.SearchAppsResponse + require.NoError(t, json.Unmarshal(msg.Data, &resp)) + + require.Len(t, resp.Apps, 2, "two apps match the 'weather' regex") + names := []string{resp.Apps[0].Name, resp.Apps[1].Name} + assert.Contains(t, names, "Weather Alpha") + assert.Contains(t, names, "Weatherly") +} + +func TestIntegration_SearchApps_AssistantEnabledFilter(t *testing.T) { + f := setupAppsFixture(t) + ctx := context.Background() + + _, err := f.mongoDB.Collection("apps").InsertMany(ctx, []any{ + map[string]any{"_id": "a1", "name": "Weather Alpha", "assistant": map[string]any{"enabled": true, "name": "weather.bot"}}, + map[string]any{"_id": "a2", "name": "Weatherly", "assistant": map[string]any{"enabled": false, "name": "weatherly.bot"}}, + }) + require.NoError(t, err) + + enabled := true + reqBytes, err := json.Marshal(model.SearchAppsRequest{ + Query: "weather", + AssistantEnabled: &enabled, + }) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchApps("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var resp model.SearchAppsResponse + require.NoError(t, json.Unmarshal(msg.Data, &resp)) + + require.Len(t, resp.Apps, 1) + assert.Equal(t, "Weather Alpha", resp.Apps[0].Name) +} + +func TestIntegration_SearchApps_EmptyQueryReturnsBadRequest(t *testing.T) { + f := setupAppsFixture(t) + + reqBytes, err := json.Marshal(model.SearchAppsRequest{Query: ""}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchApps("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var envelope model.ErrorResponse + require.NoError(t, json.Unmarshal(msg.Data, &envelope)) + require.NotEmpty(t, envelope.Error) + assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) +} diff --git a/search-service/integration_ccs_test.go b/search-service/integration_ccs_test.go new file mode 100644 index 000000000..f38db6860 --- /dev/null +++ b/search-service/integration_ccs_test.go @@ -0,0 +1,493 @@ +//go:build integration + +package main + +// CCS integration tests + helpers only CCS uses. The two CCS tests are +// the exception to the shared-container pattern in setup_shared_test.go: +// they need a pair of ES nodes on a shared docker network with +// transport-port aliases. NATS and Valkey are still shared. + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "testing" + "time" + + "github.com/nats-io/nats.go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/network" + "github.com/testcontainers/testcontainers-go/wait" + + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/searchengine" + "github.com/hmchangw/chat/pkg/subject" + "github.com/hmchangw/chat/pkg/testutil" + "github.com/hmchangw/chat/pkg/testutil/testimages" + "github.com/hmchangw/chat/pkg/valkeyutil" +) + +// --- Fixture ----------------------------------------------------------------- + +// ccsFixture owns the two-ES + Valkey + NATS stack for CCS tests. +// localURL / remoteURL are host-mapped for seeding; the service sees localURL. +type ccsFixture struct { + localURL string + remoteURL string + localES searchengine.SearchEngine + remoteES searchengine.SearchEngine + clientNATS *nats.Conn +} + +// setupCCSFixture owns the pair of networked ES containers (can't be +// process-shared — they need a shared docker network with transport-port +// aliases); piggybacks on shared Valkey/NATS. +func setupCCSFixture(t *testing.T) *ccsFixture { + t.Helper() + ctx := context.Background() + + nw, err := network.New(ctx) + require.NoError(t, err, "create docker network") + t.Cleanup(func() { _ = nw.Remove(ctx) }) + + remoteURL := startESForCCS(t, nw, "es-remote", "remote-cluster") + localURL := startESForCCS(t, nw, "es-local", "local-cluster") + + // Wire local→remote in PROXY mode. Proxy mode skips sniff-then-reconnect, + // which requires the remote to advertise a reachable publish address — + // fragile when containers bind transport on 0.0.0.0 and publish defaults + // to an unreachable interface. Ref: ES "Remote cluster settings" → `mode=proxy`. + putClusterSetting(t, localURL, map[string]any{ + "persistent": map[string]any{ + "cluster.remote.remote1.mode": "proxy", + "cluster.remote.remote1.proxy_address": "es-remote:9300", + }, + }) + waitForRemoteConnected(t, localURL, "remote1", 120*time.Second) + + localEngine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: localURL}) + require.NoError(t, err, "build searchengine for local") + remoteEngine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: remoteURL}) + require.NoError(t, err, "build searchengine for remote") + + cacheClient := valkeyutil.WrapClusterClient(testutil.SharedValkeyCluster(t)) + t.Cleanup(func() { testutil.FlushValkey(t) }) + + h := newHandler(newESStore(localEngine, testUserRoomIndex), nil, nil, newValkeyCache(cacheClient), handlerConfig{ + DocCounts: 25, + MaxDocCounts: 100, + RestrictedRoomsCacheTTL: 5 * time.Minute, + RecentWindow: 365 * 24 * time.Hour, + UserRoomIndex: testUserRoomIndex, + SpotlightReadPattern: "spotlight-test-*", + }) + clientNC := setupRouter(t, testQueueGroup, h.Register) + + return &ccsFixture{ + localURL: localURL, + remoteURL: remoteURL, + localES: localEngine, + remoteES: remoteEngine, + clientNATS: clientNC, + } +} + +// startESForCCS starts one ES node on the shared network at alias `{alias}`. +// transport.host=0.0.0.0 is required so the transport port binds on the bridge +// network (ES 8.x defaults to `_site_` which excludes the container bridge IP). +func startESForCCS(t *testing.T, nw *testcontainers.DockerNetwork, alias, clusterName string) string { + t.Helper() + ctx := context.Background() + + container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ + ContainerRequest: testcontainers.ContainerRequest{ + Image: testimages.Elasticsearch, + ExposedPorts: []string{"9200/tcp", "9300/tcp"}, + Networks: []string{nw.Name}, + NetworkAliases: map[string][]string{ + nw.Name: {alias}, + }, + Env: map[string]string{ + "cluster.name": clusterName, + "discovery.type": "single-node", + "xpack.security.enabled": "false", + "network.host": "0.0.0.0", + "transport.host": "0.0.0.0", + "cluster.routing.allocation.disk.threshold_enabled": "false", + "ES_JAVA_OPTS": "-Xms256m -Xmx256m", + }, + WaitingFor: wait.ForAll( + wait.ForHTTP("/").WithPort("9200/tcp").WithStartupTimeout(120*time.Second), + wait.ForHTTP("/_cluster/health?wait_for_status=yellow&timeout=60s"). + WithPort("9200/tcp"). + WithStartupTimeout(120*time.Second), + ), + }, + Started: true, + }) + require.NoError(t, err, "start elasticsearch (%s)", alias) + t.Cleanup(func() { _ = container.Terminate(ctx) }) + + host, err := container.Host(ctx) + require.NoError(t, err) + port, err := container.MappedPort(ctx, "9200") + require.NoError(t, err) + return fmt.Sprintf("http://%s:%s", host, port.Port()) +} + +// --- Index templates --------------------------------------------------------- + +// buildTestTemplate wraps properties with single-node-friendly settings +// (1 shard, 0 replicas) so tests don't depend on search-sync-worker's +// analyzer config. +func buildTestTemplate(pattern string, properties map[string]any) json.RawMessage { + body := map[string]any{ + "index_patterns": []string{pattern}, + "template": map[string]any{ + "settings": map[string]any{ + "index": map[string]any{ + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "1s", + }, + }, + "mappings": map[string]any{ + "dynamic": false, + "properties": properties, + }, + }, + } + data, _ := json.Marshal(body) + return data +} + +func messageTestTemplate() json.RawMessage { + return buildTestTemplate("messages-*", map[string]any{ + "messageId": map[string]any{"type": "keyword"}, + "roomId": map[string]any{"type": "keyword"}, + "siteId": map[string]any{"type": "keyword"}, + "userId": map[string]any{"type": "keyword"}, + "userAccount": map[string]any{"type": "keyword"}, + "content": map[string]any{ + "type": "text", + "fields": map[string]any{ + "keyword": map[string]any{"type": "keyword"}, + }, + }, + "createdAt": map[string]any{"type": "date"}, + "threadParentMessageId": map[string]any{"type": "keyword"}, + "threadParentMessageCreatedAt": map[string]any{"type": "date"}, + "tshow": map[string]any{"type": "boolean"}, + }) +} + +func userRoomTestTemplate() json.RawMessage { + return buildTestTemplate(testUserRoomIndex, map[string]any{ + "userAccount": map[string]any{"type": "keyword"}, + "rooms": map[string]any{ + "type": "text", + "fields": map[string]any{ + "keyword": map[string]any{"type": "keyword", "ignore_above": 256}, + }, + }, + "restrictedRooms": map[string]any{"type": "flattened"}, + "roomTimestamps": map[string]any{"type": "flattened"}, + "createdAt": map[string]any{"type": "date"}, + "updatedAt": map[string]any{"type": "date"}, + }) +} + +// --- CCS HTTP helpers -------------------------------------------------------- + +func putClusterSetting(t *testing.T, esURL string, body map[string]any) { + t.Helper() + data, _ := json.Marshal(body) + req, err := http.NewRequest(http.MethodPut, esURL+"/_cluster/settings", bytes.NewReader(data)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + resp, err := testHTTPClient.Do(req) + require.NoError(t, err, "put cluster settings") + defer resp.Body.Close() + respBody, _ := io.ReadAll(resp.Body) + require.Equal(t, http.StatusOK, resp.StatusCode, "put cluster settings: %s", respBody) +} + +// waitForRemoteConnected polls /_remote/info until the given remote cluster +// reports connected=true. CCS registration is async — the settings call +// returns immediately but the transport handshake happens in the +// background. On timeout, the last-seen /_remote/info body is captured in +// the failure message so CI can diagnose whether the remote was ever +// registered, what mode it ended up in, or why it couldn't connect. +func waitForRemoteConnected(t *testing.T, localURL, remoteName string, timeout time.Duration) { + t.Helper() + deadline := time.Now().Add(timeout) + var lastBody string + for time.Now().Before(deadline) { + resp, err := testHTTPClient.Get(localURL + "/_remote/info") + if err == nil { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + lastBody = string(body) + var info map[string]struct { + Connected bool `json:"connected"` + } + if json.Unmarshal(body, &info) == nil { + if entry, ok := info[remoteName]; ok && entry.Connected { + return + } + } + } + time.Sleep(1 * time.Second) + } + t.Fatalf("remote cluster %q never became connected within %s\nlast /_remote/info body: %s", + remoteName, timeout, lastBody) +} + +// --- Templates on both clusters --------------------------------------------- + +func (f *ccsFixture) installTemplates(t *testing.T) { + t.Helper() + ctx := context.Background() + require.NoError(t, f.localES.UpsertTemplate(ctx, "messages_template", messageTestTemplate()), + "upsert messages_template on local") + require.NoError(t, f.remoteES.UpsertTemplate(ctx, "messages_template", messageTestTemplate()), + "upsert messages_template on remote") + // user-room is local-only per the search-service architecture. + require.NoError(t, f.localES.UpsertTemplate(ctx, "user_room_template", userRoomTestTemplate()), + "upsert user_room_template on local") +} + +// --- Tests ------------------------------------------------------------------- + +// TestSearchService_SearchMessages_CCS_CrossCluster_Unrestricted verifies +// the core CCS promise: a user's search crosses from the local cluster +// (`messages-*`) to a remote cluster (`*:messages-*`) and the service +// returns the merged result set. Both rooms are unrestricted — they live in +// the user-room doc's `rooms[]` — and the terms-lookup clause handles them +// uniformly regardless of which site hosts the message. +func TestSearchService_SearchMessages_CCS_CrossCluster_Unrestricted(t *testing.T) { + f := setupCCSFixture(t) + f.installTemplates(t) + + // Alice is in two unrestricted rooms (one local, one remote); the + // local user-room doc lists both. Sync-worker normally populates it via + // INBOX events — seeded directly here. + const account = "alice" + const localRoomID = "room-local-1" + const remoteRoomID = "room-remote-1" + + now := time.Now().UTC() + createdAt := now.Add(-time.Hour) + monthIdx := "messages-" + createdAt.Format("2006-01") + + // user-room doc: unrestricted memberships in both rooms. + seedDoc(t, f.localURL, testUserRoomIndex, account, map[string]any{ + "userAccount": account, + "rooms": []string{localRoomID, remoteRoomID}, + "restrictedRooms": map[string]int64{}, + "roomTimestamps": map[string]int64{ + localRoomID: createdAt.UnixMilli(), + remoteRoomID: createdAt.UnixMilli(), + }, + "createdAt": createdAt.Format(time.RFC3339Nano), + "updatedAt": createdAt.Format(time.RFC3339Nano), + }) + + seedDoc(t, f.localURL, monthIdx, "msg-local-1", map[string]any{ + "messageId": "msg-local-1", + "roomId": localRoomID, + "siteId": "site-local", + "userId": "user-bob", + "userAccount": "bob", + "content": "hello from local", + "createdAt": createdAt.Format(time.RFC3339Nano), + }) + + // Same index pattern on the remote cluster — CCS resolves `*:messages-*`. + seedDoc(t, f.remoteURL, monthIdx, "msg-remote-1", map[string]any{ + "messageId": "msg-remote-1", + "roomId": remoteRoomID, + "siteId": "site-remote", + "userId": "user-carol", + "userAccount": "carol", + "content": "hello from remote", + "createdAt": createdAt.Format(time.RFC3339Nano), + }) + + req := model.SearchMessagesRequest{Query: "hello"} + reqData, err := json.Marshal(req) + require.NoError(t, err) + + // Long timeout: first request is Valkey miss → ES prefetch → CCS fanout. + msg, err := f.clientNATS.Request(subject.SearchMessages(account), reqData, 30*time.Second) + require.NoError(t, err, "NATS request failed") + + t.Logf("response: %s", msg.Data) + + var resp model.SearchMessagesResponse + require.NoError(t, json.Unmarshal(msg.Data, &resp), "decode response: %s", msg.Data) + + assert.EqualValues(t, 2, resp.Total, "expected both local + remote hits; got body=%s", msg.Data) + require.Len(t, resp.Messages, 2, "expected 2 hits; got body=%s", msg.Data) + + gotRooms := map[string]string{} + for _, hit := range resp.Messages { + gotRooms[hit.RoomID] = hit.SiteID + } + assert.Equal(t, "site-local", gotRooms[localRoomID], "local message should be present") + assert.Equal(t, "site-remote", gotRooms[remoteRoomID], "remote message should be present via CCS") +} + +// TestSearchService_SearchMessages_CCS_CrossCluster_Restricted verifies +// the restricted-room access-control clauses fire correctly across the +// CCS boundary. Alice is a member of one UNRESTRICTED local room and one +// RESTRICTED remote room with historySharedSince (HSS) set to a specific +// cutoff. The user-room doc (local-only) routes the remote room into +// `restrictedRooms{rid: hssMillis}`. +// +// Seed on the remote cluster covers every branch the query builder +// encodes for restricted rooms: +// +// - pre-HSS parent → MUST NOT match (Clause A: createdAt < hss) +// - post-HSS parent → MUST match (Clause A) +// - post-HSS thread reply, tshow=true → MUST match (Clause B1: outer gate passes + tshow=true fires B1, even though parent is pre-HSS) +// - post-HSS thread reply, tshow=false → MUST NOT match (Clause B fails: outer gate passes but inner OR fails — tshow=false AND parent < hss so B2 also fails) +// +// Plus one unrestricted local parent to prove the two paths interact +// cleanly on the same search. Total expected hits: 3 (local + post-HSS +// remote parent + post-HSS remote reply with tshow=true). +func TestSearchService_SearchMessages_CCS_CrossCluster_Restricted(t *testing.T) { + f := setupCCSFixture(t) + f.installTemplates(t) + + const account = "alice" + const localRoomID = "room-local-unrestricted" + const remoteRoomID = "room-remote-restricted" + + // hss is the user's join-time bound for the restricted remote room; + // preHSS / postHSS straddle it. All within the 1-year recent_window. + now := time.Now().UTC() + hss := now.Add(-2 * time.Hour) + preHSS := hss.Add(-3 * time.Hour) + postHSS := hss.Add(time.Hour) + monthIdxFor := func(ts time.Time) string { return "messages-" + ts.Format("2006-01") } + + // user-room doc: local room unrestricted, remote room restricted with hss. + seedDoc(t, f.localURL, testUserRoomIndex, account, map[string]any{ + "userAccount": account, + "rooms": []string{localRoomID}, + "restrictedRooms": map[string]int64{ + remoteRoomID: hss.UnixMilli(), + }, + "roomTimestamps": map[string]int64{ + localRoomID: now.UnixMilli(), + remoteRoomID: now.UnixMilli(), + }, + "createdAt": now.Format(time.RFC3339Nano), + "updatedAt": now.Format(time.RFC3339Nano), + }) + + // --- LOCAL unrestricted room ---------------------------------------- + // One plain message that should always match via the terms-lookup + // branch (no HSS involved). + seedDoc(t, f.localURL, monthIdxFor(postHSS), "msg-local-1", map[string]any{ + "messageId": "msg-local-1", + "roomId": localRoomID, + "siteId": "site-local", + "userId": "user-bob", + "userAccount": "bob", + "content": "hello from local", + "createdAt": postHSS.Format(time.RFC3339Nano), + }) + + // --- REMOTE restricted room ----------------------------------------- + // Four messages, each exercising one branch of the restricted-room + // clauses. Pre-HSS parent lives at `msg-remote-pre-parent`; its + // thread replies reference it via threadParentMessageId + + // threadParentMessageCreatedAt=preHSS. + seedDoc(t, f.remoteURL, monthIdxFor(preHSS), "msg-remote-pre-parent", map[string]any{ + "messageId": "msg-remote-pre-parent", + "roomId": remoteRoomID, + "siteId": "site-remote", + "userId": "user-carol", + "userAccount": "carol", + "content": "hello pre-hss parent", + "createdAt": preHSS.Format(time.RFC3339Nano), + }) + + seedDoc(t, f.remoteURL, monthIdxFor(postHSS), "msg-remote-post-parent", map[string]any{ + "messageId": "msg-remote-post-parent", + "roomId": remoteRoomID, + "siteId": "site-remote", + "userId": "user-carol", + "userAccount": "carol", + "content": "hello post-hss parent", + "createdAt": postHSS.Format(time.RFC3339Nano), + }) + + // Post-HSS reply to a pre-HSS parent, tshow=true → Clause B1 matches. + // The reply's own createdAt satisfies Clause B's outer gate + // (createdAt >= hss); tshow=true then fires B1 regardless of the + // parent's age. If the outer gate weren't there, a pre-HSS tshow=true + // reply would leak history the user never had access to. + seedDoc(t, f.remoteURL, monthIdxFor(postHSS), "msg-remote-reply-tshow", map[string]any{ + "messageId": "msg-remote-reply-tshow", + "roomId": remoteRoomID, + "siteId": "site-remote", + "userId": "user-carol", + "userAccount": "carol", + "content": "hello tshow reply", + "createdAt": postHSS.Add(time.Minute).Format(time.RFC3339Nano), + "threadParentMessageId": "msg-remote-pre-parent", + "threadParentMessageCreatedAt": preHSS.Format(time.RFC3339Nano), + "tshow": true, + }) + + // Post-HSS reply to a pre-HSS parent, tshow=false → Clause B rejects. + // Outer gate passes (reply createdAt >= hss) but the inner OR fails: + // tshow=false blocks B1 and the parent's pre-HSS createdAt blocks B2. + seedDoc(t, f.remoteURL, monthIdxFor(postHSS), "msg-remote-reply-plain", map[string]any{ + "messageId": "msg-remote-reply-plain", + "roomId": remoteRoomID, + "siteId": "site-remote", + "userId": "user-carol", + "userAccount": "carol", + "content": "hello plain reply", + "createdAt": postHSS.Add(2 * time.Minute).Format(time.RFC3339Nano), + "threadParentMessageId": "msg-remote-pre-parent", + "threadParentMessageCreatedAt": preHSS.Format(time.RFC3339Nano), + }) + + // --- Search --------------------------------------------------------- + reqData, err := json.Marshal(model.SearchMessagesRequest{Query: "hello"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchMessages(account), reqData, 30*time.Second) + require.NoError(t, err, "NATS request failed") + t.Logf("response: %s", msg.Data) + + var resp model.SearchMessagesResponse + require.NoError(t, json.Unmarshal(msg.Data, &resp), "decode response: %s", msg.Data) + + got := map[string]bool{} + for _, hit := range resp.Messages { + got[hit.MessageID] = true + } + + // Expected matches: + assert.True(t, got["msg-local-1"], "local unrestricted message must match via terms-lookup") + assert.True(t, got["msg-remote-post-parent"], "post-HSS remote parent must match via Clause A (CCS)") + assert.True(t, got["msg-remote-reply-tshow"], "post-HSS remote reply with tshow=true must match via Clause B1 (CCS)") + + // Expected exclusions: + assert.False(t, got["msg-remote-pre-parent"], "pre-HSS remote parent must be excluded by Clause A gate") + assert.False(t, got["msg-remote-reply-plain"], "post-HSS remote reply without tshow + pre-HSS parent must be excluded (outer gate passes; B1 and B2 both fail)") + + assert.EqualValues(t, 3, resp.Total, "expected exactly 3 hits; got body=%s", msg.Data) + require.Len(t, resp.Messages, 3, "expected 3 hits; got body=%s", msg.Data) +} diff --git a/search-service/integration_messages_test.go b/search-service/integration_messages_test.go new file mode 100644 index 000000000..66d84a2eb --- /dev/null +++ b/search-service/integration_messages_test.go @@ -0,0 +1,100 @@ +//go:build integration + +package main + +// Integration tests for search.messages v2 (ES stubbed via httptest, shared NATS). + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/nats-io/nats.go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/natsrouter" + "github.com/hmchangw/chat/pkg/searchengine" + "github.com/hmchangw/chat/pkg/subject" +) + +type messagesV2Fixture struct { + clientNATS *nats.Conn +} + +func setupMessagesV2Fixture(t *testing.T) *messagesV2Fixture { + t.Helper() + esStub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Drain the body so the HTTP/1.1 connection stays open. + _, _ = io.Copy(io.Discard, r.Body) + // The Elastic Go client performs a "product check" handshake on + // connect and rejects any server that doesn't advertise itself + // as Elasticsearch via this header. Set it on every response so + // the stub passes the check regardless of which endpoint is hit. + w.Header().Set("X-Elastic-Product", "Elasticsearch") + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"hits":{"total":{"value":1},"hits":[{"_source":{` + + `"messageId":"m1","roomId":"r1","siteId":"site-a","userId":"u1",` + + `"userAccount":"alice","content":"hello","createdAt":"2026-04-01T12:00:00Z"}}]}}`)) + })) + t.Cleanup(esStub.Close) + + fakeValkey := newFakeCache() + fakeValkey.store["alice"] = map[string]int64{} // empty restricted map, cache hit + + engine, err := searchengine.New(context.Background(), searchengine.Config{Backend: "elasticsearch", URL: esStub.URL}) + require.NoError(t, err) + h := newHandler(newESStore(engine, testUserRoomIndex), nil, nil, fakeValkey, handlerConfig{ + DocCounts: 25, + MaxDocCounts: 100, + RestrictedRoomsCacheTTL: 5 * time.Minute, + RecentWindow: 365 * 24 * time.Hour, + RequestTimeout: 5 * time.Second, + UserRoomIndex: testUserRoomIndex, + SpotlightReadPattern: "spotlight-*", + }) + clientNATS := setupRouter(t, testQueueGroupV2, h.Register) + return &messagesV2Fixture{clientNATS: clientNATS} +} + +func TestIntegration_SearchMessages_V2_HitProjection(t *testing.T) { + f := setupMessagesV2Fixture(t) + + reqBytes, err := json.Marshal(model.SearchMessagesRequest{Query: "hello"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchMessages("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var resp model.SearchMessagesResponse + require.NoError(t, json.Unmarshal(msg.Data, &resp)) + + require.Len(t, resp.Messages, 1) + assert.EqualValues(t, 1, resp.Total) + + got := resp.Messages[0] + assert.Equal(t, "m1", got.MessageID) + assert.Equal(t, "r1", got.RoomID) + assert.Equal(t, "site-a", got.SiteID) + assert.Equal(t, "alice", got.UserAccount) + assert.Equal(t, "hello", got.Content) +} + +func TestIntegration_SearchMessages_V2_EmptyQueryReturnsBadRequest(t *testing.T) { + f := setupMessagesV2Fixture(t) + + reqBytes, err := json.Marshal(model.SearchMessagesRequest{Query: ""}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchMessages("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var envelope model.ErrorResponse + require.NoError(t, json.Unmarshal(msg.Data, &envelope)) + assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) +} diff --git a/search-service/integration_rooms_test.go b/search-service/integration_rooms_test.go new file mode 100644 index 000000000..5062d8cee --- /dev/null +++ b/search-service/integration_rooms_test.go @@ -0,0 +1,212 @@ +//go:build integration + +package main + +// Integration tests for search.rooms (real ES + shared NATS + Valkey). + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "testing" + "time" + + "github.com/nats-io/nats.go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/natsrouter" + "github.com/hmchangw/chat/pkg/searchengine" + "github.com/hmchangw/chat/pkg/subject" + "github.com/hmchangw/chat/pkg/testutil" + "github.com/hmchangw/chat/pkg/valkeyutil" +) + +// Per-test spotlight index against shared ES. +type roomsFixture struct { + clientNATS *nats.Conn + esURL string + spotlightIndex string +} + +func setupRoomsFixture(t *testing.T) *roomsFixture { + t.Helper() + esURL := testutil.Elasticsearch(t) + spotlightIndex := testutil.ElasticsearchIndex(t, "spotlight") + putTestSpotlightIndex(t, esURL, spotlightIndex) + + engine, err := searchengine.New(context.Background(), searchengine.Config{Backend: "elasticsearch", URL: esURL}) + require.NoError(t, err, "build searchengine for subs fixture") + + cache := newValkeyCache(valkeyutil.WrapClusterClient(testutil.SharedValkeyCluster(t))) + t.Cleanup(func() { testutil.FlushValkey(t) }) + h := newHandler(newESStore(engine, testUserRoomIndex), nil, nil, cache, handlerConfig{ + DocCounts: 25, + MaxDocCounts: 100, + RestrictedRoomsCacheTTL: 5 * time.Minute, + RecentWindow: 365 * 24 * time.Hour, + RequestTimeout: 5 * time.Second, + SpotlightReadPattern: spotlightIndex, + }) + clientNC := setupRouter(t, testQueueGroupSubs, h.Register) + return &roomsFixture{clientNATS: clientNC, esURL: esURL, spotlightIndex: spotlightIndex} +} + +func putTestSpotlightIndex(t *testing.T, esURL, index string) { + t.Helper() + body := map[string]any{ + "settings": map[string]any{ + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "1s", + }, + "mappings": map[string]any{ + "dynamic": false, + "properties": map[string]any{ + "roomId": map[string]any{"type": "keyword"}, + "roomName": map[string]any{ + "type": "search_as_you_type", + }, + "roomType": map[string]any{"type": "keyword"}, + "userAccount": map[string]any{"type": "keyword"}, + "siteId": map[string]any{"type": "keyword"}, + "joinedAt": map[string]any{"type": "date"}, + }, + }, + } + data, _ := json.Marshal(body) + req, err := http.NewRequest(http.MethodPut, esURL+"/"+index, bytes.NewReader(data)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + resp, err := testHTTPClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + b, _ := io.ReadAll(resp.Body) + require.True(t, resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusCreated, + "create spotlight index: status=%d body=%s", resp.StatusCode, b) +} + +func TestIntegration_SearchRooms_HappyPath(t *testing.T) { + f := setupRoomsFixture(t) + + const account = "alice" + now := time.Now().UTC() + + seedDoc(t, f.esURL, f.spotlightIndex, "spot-r1", map[string]any{ + "roomId": "r1", + "roomName": "engineering-announcements", + "roomType": "channel", + "userAccount": account, + "siteId": "site-local", + "joinedAt": now.Add(-48 * time.Hour).Format(time.RFC3339), + }) + seedDoc(t, f.esURL, f.spotlightIndex, "spot-r2", map[string]any{ + "roomId": "r2", + "roomName": "engineering-random", + "roomType": "channel", + "userAccount": account, + "siteId": "site-local", + "joinedAt": now.Add(-24 * time.Hour).Format(time.RFC3339), + }) + // A matching room owned by a different account. With the Mongo + // hydration removed, the spotlight userAccount term filter is the + // sole access boundary — this must not leak into alice's results. + seedDoc(t, f.esURL, f.spotlightIndex, "spot-r3", map[string]any{ + "roomId": "r3", + "roomName": "engineering-secret", + "roomType": "channel", + "userAccount": "mallory", + "siteId": "site-local", + "joinedAt": now.Add(-12 * time.Hour).Format(time.RFC3339), + }) + + reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: "engineering"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchRooms(account), reqBytes, 10*time.Second) + require.NoError(t, err) + + var resp model.SearchRoomsResponse + require.NoError(t, json.Unmarshal(msg.Data, &resp)) + + require.Len(t, resp.Rooms, 2, "both rooms matching 'engineering' must be returned") + byID := map[string]model.SearchRoom{} + for _, r := range resp.Rooms { + byID[r.RoomID] = r + } + assert.Equal(t, model.SearchRoom{RoomID: "r1", Name: "engineering-announcements", RoomType: "channel", SiteID: "site-local"}, byID["r1"]) + assert.Equal(t, model.SearchRoom{RoomID: "r2", Name: "engineering-random", RoomType: "channel", SiteID: "site-local"}, byID["r2"]) + _, leaked := byID["r3"] + assert.False(t, leaked, "rooms owned by another account must not leak") +} + +func TestIntegration_SearchRooms_RoomTypeChannelFilter(t *testing.T) { + f := setupRoomsFixture(t) + + const account = "bob" + now := time.Now().UTC() + + seedDoc(t, f.esURL, f.spotlightIndex, "spot-b-r1", map[string]any{ + "roomId": "b-r1", + "roomName": "bob-alice", + "roomType": "dm", + "userAccount": account, + "siteId": "site-local", + "joinedAt": now.Add(-1 * time.Hour).Format(time.RFC3339), + }) + seedDoc(t, f.esURL, f.spotlightIndex, "spot-b-r2", map[string]any{ + "roomId": "b-r2", + "roomName": "bob-channel", + "roomType": "channel", + "userAccount": account, + "siteId": "site-local", + "joinedAt": now.Add(-2 * time.Hour).Format(time.RFC3339), + }) + + reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: "bob", RoomType: "channel"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchRooms(account), reqBytes, 10*time.Second) + require.NoError(t, err) + + var resp model.SearchRoomsResponse + require.NoError(t, json.Unmarshal(msg.Data, &resp)) + + require.Len(t, resp.Rooms, 1) + assert.Equal(t, model.SearchRoom{RoomID: "b-r2", Name: "bob-channel", RoomType: "channel", SiteID: "site-local"}, resp.Rooms[0], + "only the channel room must match roomType=channel filter") +} + +func TestIntegration_SearchRooms_EmptyQueryReturnsBadRequest(t *testing.T) { + f := setupRoomsFixture(t) + + reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: ""}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchRooms("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var envelope model.ErrorResponse + require.NoError(t, json.Unmarshal(msg.Data, &envelope)) + require.NotEmpty(t, envelope.Error) + assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) +} + +func TestIntegration_SearchRooms_RoomTypeAppReturnsBadRequest(t *testing.T) { + f := setupRoomsFixture(t) + + reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: "x", RoomType: "app"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchRooms("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var envelope model.ErrorResponse + require.NoError(t, json.Unmarshal(msg.Data, &envelope)) + require.NotEmpty(t, envelope.Error) + assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) + assert.Contains(t, envelope.Error, "invalid roomType") +} diff --git a/search-service/integration_test.go b/search-service/integration_test.go deleted file mode 100644 index 9432f04c1..000000000 --- a/search-service/integration_test.go +++ /dev/null @@ -1,1216 +0,0 @@ -//go:build integration - -package main - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "net/http/httptest" - "testing" - "time" - - "github.com/nats-io/nats.go" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/testcontainers/testcontainers-go" - natsmod "github.com/testcontainers/testcontainers-go/modules/nats" - "github.com/testcontainers/testcontainers-go/network" - "github.com/testcontainers/testcontainers-go/wait" - - "go.mongodb.org/mongo-driver/v2/mongo" - - "github.com/hmchangw/chat/pkg/model" - "github.com/hmchangw/chat/pkg/natsrouter" - "github.com/hmchangw/chat/pkg/natsutil" - "github.com/hmchangw/chat/pkg/restyutil" - "github.com/hmchangw/chat/pkg/searchengine" - "github.com/hmchangw/chat/pkg/subject" - "github.com/hmchangw/chat/pkg/testutil" - "github.com/hmchangw/chat/pkg/testutil/testimages" - "github.com/hmchangw/chat/pkg/valkeyutil" -) - -const testUserRoomIndex = "user-room" - -// --- Fixture ----------------------------------------------------------------- - -// ccsFixture is the full stack for cross-cluster integration tests: two ES -// containers on a shared Docker network (with CCS configured from local → -// remote), plus Valkey and NATS, plus the wired search-service router. -// -// localURL / remoteURL are the host-mapped HTTP URLs for seeding; the -// search-service itself sees only localURL. `clientNATS` is the raw NATS -// client used to issue request/reply calls. -type ccsFixture struct { - localURL string - remoteURL string - localES searchengine.SearchEngine - remoteES searchengine.SearchEngine - clientNATS *nats.Conn -} - -// setupCCSFixture stands up the whole CCS environment. Total cost is ~ES -// container start × 2 (~60-90s) so tests that use it should reuse via -// TestMain when added. -// -// Every major step emits a `t.Logf` so a CI failure (where raw logs are -// often opaque on public runs) leaves enough breadcrumbs in the `go test` -// output to pinpoint which phase broke. -func setupCCSFixture(t *testing.T) *ccsFixture { - t.Helper() - ctx := context.Background() - - t.Logf("CCS fixture: creating docker network") - nw, err := network.New(ctx) - require.NoError(t, err, "create docker network") - t.Cleanup(func() { _ = nw.Remove(ctx) }) - t.Logf("CCS fixture: network %q created", nw.Name) - - t.Logf("CCS fixture: starting remote ES container (alias=es-remote)") - remoteURL := startESForCCS(t, nw, "es-remote", "remote-cluster") - t.Logf("CCS fixture: remote ES up at %s", remoteURL) - - t.Logf("CCS fixture: starting local ES container (alias=es-local)") - localURL := startESForCCS(t, nw, "es-local", "local-cluster") - t.Logf("CCS fixture: local ES up at %s", localURL) - - // Wire local ES to reach the remote in PROXY mode. Proxy mode opens a - // single direct connection to the configured address and skips the - // sniff-then-reconnect dance that sniff mode does — that dance requires - // each remote node to advertise a reachable publish address, which is - // fragile when docker containers bind transport on 0.0.0.0 and the - // publish address defaults to an interface the peer can't route to. - // Proxy mode is the robust choice for CCS over an ephemeral docker - // network. Ref: ES docs "Remote cluster settings" → `mode=proxy`. - t.Logf("CCS fixture: configuring cluster.remote.remote1 (proxy mode → es-remote:9300)") - putClusterSetting(t, localURL, map[string]any{ - "persistent": map[string]any{ - "cluster.remote.remote1.mode": "proxy", - "cluster.remote.remote1.proxy_address": "es-remote:9300", - }, - }) - t.Logf("CCS fixture: waiting for remote1 to report connected=true (timeout 120s)") - waitForRemoteConnected(t, localURL, "remote1", 120*time.Second) - t.Logf("CCS fixture: remote1 connected") - - localEngine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: localURL}) - require.NoError(t, err, "build searchengine for local") - remoteEngine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: remoteURL}) - require.NoError(t, err, "build searchengine for remote") - - t.Logf("CCS fixture: starting valkey") - valkeyClient := valkeyutil.WrapClusterClient(testutil.StartValkeyCluster(t)) - t.Cleanup(func() { valkeyutil.Disconnect(valkeyClient) }) - t.Logf("CCS fixture: valkey started") - - t.Logf("CCS fixture: starting NATS") - natsURL := startNATS(t) - serverNC, err := natsutil.Connect(natsURL, "") - require.NoError(t, err, "connect nats (server side)") - t.Cleanup(func() { _ = serverNC.Drain() }) - - clientNC, err := nats.Connect(natsURL) - require.NoError(t, err, "connect nats (client side)") - t.Cleanup(func() { clientNC.Close() }) - t.Logf("CCS fixture: NATS at %s", natsURL) - - userRoomIndex := testUserRoomIndex - store := newESStore(localEngine, userRoomIndex) - cache := newValkeyCache(valkeyClient) - handler := newHandler(store, nil, nil, cache, handlerConfig{ - DocCounts: 25, - MaxDocCounts: 100, - RestrictedRoomsCacheTTL: 5 * time.Minute, - RecentWindow: 365 * 24 * time.Hour, - UserRoomIndex: userRoomIndex, - SpotlightReadPattern: "spotlight-test-*", - }) - - router := natsrouter.New(serverNC, "search-service-test") - router.Use(natsrouter.RequestID()) - handler.Register(router) - // Flush — see setupAppsFixture for the rationale. - require.NoError(t, serverNC.NatsConn().Flush()) - - return &ccsFixture{ - localURL: localURL, - remoteURL: remoteURL, - localES: localEngine, - remoteES: remoteEngine, - clientNATS: clientNC, - } -} - -// startESForCCS starts one ES node on the shared network with the given -// network alias so the peer can reach it at `{alias}:9300`. Returns the -// host-mapped HTTP URL for seeding. -// -// `transport.host: 0.0.0.0` is required so the transport port binds on all -// interfaces, including the bridge network (ES 8.x defaults to `_site_` -// which excludes the container's bridge IP in some setups). CCS itself -// uses `proxy` mode to avoid publish-address sensitivity — see -// setupCCSFixture. `xpack.security.enabled=false` matches the local dev -// deps compose. -func startESForCCS(t *testing.T, nw *testcontainers.DockerNetwork, alias, clusterName string) string { - t.Helper() - ctx := context.Background() - - container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ - ContainerRequest: testcontainers.ContainerRequest{ - Image: testimages.Elasticsearch, - ExposedPorts: []string{"9200/tcp", "9300/tcp"}, - Networks: []string{nw.Name}, - NetworkAliases: map[string][]string{ - nw.Name: {alias}, - }, - Env: map[string]string{ - "cluster.name": clusterName, - "discovery.type": "single-node", - "xpack.security.enabled": "false", - "network.host": "0.0.0.0", - "transport.host": "0.0.0.0", - "cluster.routing.allocation.disk.threshold_enabled": "false", - "ES_JAVA_OPTS": "-Xms512m -Xmx512m", - }, - WaitingFor: wait.ForAll( - wait.ForHTTP("/").WithPort("9200/tcp").WithStartupTimeout(120*time.Second), - wait.ForHTTP("/_cluster/health?wait_for_status=yellow&timeout=60s"). - WithPort("9200/tcp"). - WithStartupTimeout(120*time.Second), - ), - }, - Started: true, - }) - require.NoError(t, err, "start elasticsearch (%s)", alias) - t.Cleanup(func() { _ = container.Terminate(ctx) }) - - host, err := container.Host(ctx) - require.NoError(t, err) - port, err := container.MappedPort(ctx, "9200") - require.NoError(t, err) - return fmt.Sprintf("http://%s:%s", host, port.Port()) -} - -func startNATS(t *testing.T) string { - t.Helper() - ctx := context.Background() - c, err := natsmod.Run(ctx, testimages.NATS) - require.NoError(t, err, "start nats") - t.Cleanup(func() { _ = c.Terminate(ctx) }) - - url, err := c.ConnectionString(ctx) - require.NoError(t, err, "nats connection string") - return url -} - -// --- Index templates --------------------------------------------------------- - -// buildTestTemplate wraps a pattern + property map with single-node-friendly -// index settings (1 shard, 0 replicas, 1s refresh) and `dynamic: false` -// mappings. The templates below hand-roll their property sets so the tests -// remain independent of search-sync-worker's custom-analyzer configuration. -func buildTestTemplate(pattern string, properties map[string]any) json.RawMessage { - body := map[string]any{ - "index_patterns": []string{pattern}, - "template": map[string]any{ - "settings": map[string]any{ - "index": map[string]any{ - "number_of_shards": 1, - "number_of_replicas": 0, - "refresh_interval": "1s", - }, - }, - "mappings": map[string]any{ - "dynamic": false, - "properties": properties, - }, - }, - } - data, _ := json.Marshal(body) - return data -} - -func messageTestTemplate() json.RawMessage { - return buildTestTemplate("messages-*", map[string]any{ - "messageId": map[string]any{"type": "keyword"}, - "roomId": map[string]any{"type": "keyword"}, - "siteId": map[string]any{"type": "keyword"}, - "userId": map[string]any{"type": "keyword"}, - "userAccount": map[string]any{"type": "keyword"}, - "content": map[string]any{ - "type": "text", - "fields": map[string]any{ - "keyword": map[string]any{"type": "keyword"}, - }, - }, - "createdAt": map[string]any{"type": "date"}, - "threadParentMessageId": map[string]any{"type": "keyword"}, - "threadParentMessageCreatedAt": map[string]any{"type": "date"}, - "tshow": map[string]any{"type": "boolean"}, - }) -} - -func userRoomTestTemplate() json.RawMessage { - return buildTestTemplate(testUserRoomIndex, map[string]any{ - "userAccount": map[string]any{"type": "keyword"}, - "rooms": map[string]any{ - "type": "text", - "fields": map[string]any{ - "keyword": map[string]any{"type": "keyword", "ignore_above": 256}, - }, - }, - "restrictedRooms": map[string]any{"type": "flattened"}, - "roomTimestamps": map[string]any{"type": "flattened"}, - "createdAt": map[string]any{"type": "date"}, - "updatedAt": map[string]any{"type": "date"}, - }) -} - -// --- HTTP helpers ------------------------------------------------------------ - -// testHTTPClient is a bounded HTTP client for ES control-plane calls — -// stalled containers shouldn't be able to hang the integration job past -// the per-call deadline. Kept small on purpose: these calls hit localhost -// (docker-mapped port) and are cheap when they succeed. -var testHTTPClient = &http.Client{Timeout: 10 * time.Second} - -// putClusterSetting pushes a /_cluster/settings update. Used to configure -// the CCS remote after both clusters are up. -func putClusterSetting(t *testing.T, esURL string, body map[string]any) { - t.Helper() - data, _ := json.Marshal(body) - req, err := http.NewRequest(http.MethodPut, esURL+"/_cluster/settings", bytes.NewReader(data)) - require.NoError(t, err) - req.Header.Set("Content-Type", "application/json") - resp, err := testHTTPClient.Do(req) - require.NoError(t, err, "put cluster settings") - defer resp.Body.Close() - respBody, _ := io.ReadAll(resp.Body) - require.Equal(t, http.StatusOK, resp.StatusCode, "put cluster settings: %s", respBody) -} - -// waitForRemoteConnected polls /_remote/info until the given remote cluster -// reports connected=true. CCS registration is async — the settings call -// returns immediately but the transport handshake happens in the -// background. On timeout, the last-seen /_remote/info body is captured in -// the failure message so CI can diagnose whether the remote was ever -// registered, what mode it ended up in, or why it couldn't connect. -func waitForRemoteConnected(t *testing.T, localURL, remoteName string, timeout time.Duration) { - t.Helper() - deadline := time.Now().Add(timeout) - var lastBody string - for time.Now().Before(deadline) { - resp, err := testHTTPClient.Get(localURL + "/_remote/info") - if err == nil { - body, _ := io.ReadAll(resp.Body) - resp.Body.Close() - lastBody = string(body) - var info map[string]struct { - Connected bool `json:"connected"` - } - if json.Unmarshal(body, &info) == nil { - if entry, ok := info[remoteName]; ok && entry.Connected { - return - } - } - } - time.Sleep(1 * time.Second) - } - t.Fatalf("remote cluster %q never became connected within %s\nlast /_remote/info body: %s", - remoteName, timeout, lastBody) -} - -// seedDoc PUTs a JSON document into ES, synchronously refreshing the index -// so the next search sees it. -func seedDoc(t *testing.T, esURL, index, id string, doc any) { - t.Helper() - data, err := json.Marshal(doc) - require.NoError(t, err) - url := fmt.Sprintf("%s/%s/_doc/%s?refresh=true", esURL, index, id) - req, err := http.NewRequest(http.MethodPut, url, bytes.NewReader(data)) - require.NoError(t, err) - req.Header.Set("Content-Type", "application/json") - resp, err := testHTTPClient.Do(req) - require.NoError(t, err) - defer resp.Body.Close() - body, _ := io.ReadAll(resp.Body) - require.Truef(t, resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusOK, - "seedDoc %s/%s: status=%d body=%s", index, id, resp.StatusCode, body) -} - -// --- Templates on both clusters --------------------------------------------- - -func (f *ccsFixture) installTemplates(t *testing.T) { - t.Helper() - ctx := context.Background() - - t.Logf("templates: upserting messages_template on local") - require.NoError(t, f.localES.UpsertTemplate(ctx, "messages_template", messageTestTemplate()), - "upsert messages_template on local") - t.Logf("templates: upserting messages_template on remote") - require.NoError(t, f.remoteES.UpsertTemplate(ctx, "messages_template", messageTestTemplate()), - "upsert messages_template on remote") - // user-room is local-only per the search-service architecture. - t.Logf("templates: upserting user_room_template on local") - require.NoError(t, f.localES.UpsertTemplate(ctx, "user_room_template", userRoomTestTemplate()), - "upsert user_room_template on local") - t.Logf("templates: all upserted") -} - -// --- Test -------------------------------------------------------------------- - -// TestSearchService_SearchMessages_CCS_CrossCluster_Unrestricted verifies -// the core CCS promise: a user's search crosses from the local cluster -// (`messages-*`) to a remote cluster (`*:messages-*`) and the service -// returns the merged result set. Both rooms are unrestricted — they live in -// the user-room doc's `rooms[]` — and the terms-lookup clause handles them -// uniformly regardless of which site hosts the message. -func TestSearchService_SearchMessages_CCS_CrossCluster_Unrestricted(t *testing.T) { - f := setupCCSFixture(t) - f.installTemplates(t) - - // --- Seed -------------------------------------------------------------- - // - // Alice is a member of two unrestricted rooms: one lives on the local - // site, the other on the remote site. The user-room doc (local-only) - // lists BOTH in `rooms[]` — the sync-worker would normally populate - // this via INBOX events; here we seed directly. - const account = "alice" - const localRoomID = "room-local-1" - const remoteRoomID = "room-remote-1" - - now := time.Now().UTC() - createdAt := now.Add(-time.Hour) - monthIdx := "messages-" + createdAt.Format("2006-01") - - // user-room doc: unrestricted memberships in both rooms. - seedDoc(t, f.localURL, testUserRoomIndex, account, map[string]any{ - "userAccount": account, - "rooms": []string{localRoomID, remoteRoomID}, - "restrictedRooms": map[string]int64{}, - "roomTimestamps": map[string]int64{ - localRoomID: createdAt.UnixMilli(), - remoteRoomID: createdAt.UnixMilli(), - }, - "createdAt": createdAt.Format(time.RFC3339Nano), - "updatedAt": createdAt.Format(time.RFC3339Nano), - }) - - // Local message in local room. - seedDoc(t, f.localURL, monthIdx, "msg-local-1", map[string]any{ - "messageId": "msg-local-1", - "roomId": localRoomID, - "siteId": "site-local", - "userId": "user-bob", - "userAccount": "bob", - "content": "hello from local", - "createdAt": createdAt.Format(time.RFC3339Nano), - }) - - // Remote message in remote room. Same index pattern (`messages-*`) on - // the remote cluster — CCS resolves the `*:messages-*` segment on the - // local query. - seedDoc(t, f.remoteURL, monthIdx, "msg-remote-1", map[string]any{ - "messageId": "msg-remote-1", - "roomId": remoteRoomID, - "siteId": "site-remote", - "userId": "user-carol", - "userAccount": "carol", - "content": "hello from remote", - "createdAt": createdAt.Format(time.RFC3339Nano), - }) - - // --- Search via NATS --------------------------------------------------- - // - // Round-trips through the real natsrouter: the handler reads - // restrictedRooms from Valkey (miss → ES prefetch → Valkey SET), then - // builds the CCS query against `messages-*,*:messages-*` and parses - // the merged response. - req := model.SearchMessagesRequest{Query: "hello"} - reqData, err := json.Marshal(req) - require.NoError(t, err) - - // Generous timeout: first request is Valkey miss → ES prefetch of - // user-room doc → CCS fanout → response parse. Tight timeouts mask - // real latency bugs in integration. - msg, err := f.clientNATS.Request(subject.SearchMessages(account), reqData, 30*time.Second) - require.NoError(t, err, "NATS request failed") - - t.Logf("response: %s", msg.Data) - - var resp model.SearchMessagesResponse - require.NoError(t, json.Unmarshal(msg.Data, &resp), "decode response: %s", msg.Data) - - assert.EqualValues(t, 2, resp.Total, "expected both local + remote hits; got body=%s", msg.Data) - require.Len(t, resp.Messages, 2, "expected 2 hits; got body=%s", msg.Data) - - gotRooms := map[string]string{} - for _, hit := range resp.Messages { - gotRooms[hit.RoomID] = hit.SiteID - } - assert.Equal(t, "site-local", gotRooms[localRoomID], "local message should be present") - assert.Equal(t, "site-remote", gotRooms[remoteRoomID], "remote message should be present via CCS") -} - -// TestSearchService_SearchMessages_CCS_CrossCluster_Restricted verifies -// the restricted-room access-control clauses fire correctly across the -// CCS boundary. Alice is a member of one UNRESTRICTED local room and one -// RESTRICTED remote room with historySharedSince (HSS) set to a specific -// cutoff. The user-room doc (local-only) routes the remote room into -// `restrictedRooms{rid: hssMillis}`. -// -// Seed on the remote cluster covers every branch the query builder -// encodes for restricted rooms: -// -// - pre-HSS parent → MUST NOT match (Clause A: createdAt < hss) -// - post-HSS parent → MUST match (Clause A) -// - post-HSS thread reply, tshow=true → MUST match (Clause B1: outer gate passes + tshow=true fires B1, even though parent is pre-HSS) -// - post-HSS thread reply, tshow=false → MUST NOT match (Clause B fails: outer gate passes but inner OR fails — tshow=false AND parent < hss so B2 also fails) -// -// Plus one unrestricted local parent to prove the two paths interact -// cleanly on the same search. Total expected hits: 3 (local + post-HSS -// remote parent + post-HSS remote reply with tshow=true). -func TestSearchService_SearchMessages_CCS_CrossCluster_Restricted(t *testing.T) { - f := setupCCSFixture(t) - f.installTemplates(t) - - const account = "alice" - const localRoomID = "room-local-unrestricted" - const remoteRoomID = "room-remote-restricted" - - // Temporal setup: - // - hss is the user's join-time bound for the restricted remote room. - // - preHSS is 3 hours before hss (so pre-HSS messages are clearly - // older than the gate). - // - postHSS is 1 hour after hss. - // All well within the default 1-year `recent_window` so none of them - // get filtered out by the global createdAt range filter. - now := time.Now().UTC() - hss := now.Add(-2 * time.Hour) - preHSS := hss.Add(-3 * time.Hour) - postHSS := hss.Add(time.Hour) - monthIdxFor := func(ts time.Time) string { return "messages-" + ts.Format("2006-01") } - - // user-room doc: local room unrestricted, remote room restricted with hss. - t.Logf("seed: upserting user-room doc for %s (restricted %s since %s)", account, remoteRoomID, hss.Format(time.RFC3339)) - seedDoc(t, f.localURL, testUserRoomIndex, account, map[string]any{ - "userAccount": account, - "rooms": []string{localRoomID}, - "restrictedRooms": map[string]int64{ - remoteRoomID: hss.UnixMilli(), - }, - "roomTimestamps": map[string]int64{ - localRoomID: now.UnixMilli(), - remoteRoomID: now.UnixMilli(), - }, - "createdAt": now.Format(time.RFC3339Nano), - "updatedAt": now.Format(time.RFC3339Nano), - }) - - // --- LOCAL unrestricted room ---------------------------------------- - // One plain message that should always match via the terms-lookup - // branch (no HSS involved). - t.Logf("seed: local unrestricted message in %s", localRoomID) - seedDoc(t, f.localURL, monthIdxFor(postHSS), "msg-local-1", map[string]any{ - "messageId": "msg-local-1", - "roomId": localRoomID, - "siteId": "site-local", - "userId": "user-bob", - "userAccount": "bob", - "content": "hello from local", - "createdAt": postHSS.Format(time.RFC3339Nano), - }) - - // --- REMOTE restricted room ----------------------------------------- - // Four messages, each exercising one branch of the restricted-room - // clauses. Pre-HSS parent lives at `msg-remote-pre-parent`; its - // thread replies reference it via threadParentMessageId + - // threadParentMessageCreatedAt=preHSS. - t.Logf("seed: remote pre-HSS parent (MUST NOT match)") - seedDoc(t, f.remoteURL, monthIdxFor(preHSS), "msg-remote-pre-parent", map[string]any{ - "messageId": "msg-remote-pre-parent", - "roomId": remoteRoomID, - "siteId": "site-remote", - "userId": "user-carol", - "userAccount": "carol", - "content": "hello pre-hss parent", - "createdAt": preHSS.Format(time.RFC3339Nano), - }) - - t.Logf("seed: remote post-HSS parent (Clause A match)") - seedDoc(t, f.remoteURL, monthIdxFor(postHSS), "msg-remote-post-parent", map[string]any{ - "messageId": "msg-remote-post-parent", - "roomId": remoteRoomID, - "siteId": "site-remote", - "userId": "user-carol", - "userAccount": "carol", - "content": "hello post-hss parent", - "createdAt": postHSS.Format(time.RFC3339Nano), - }) - - // Post-HSS reply to a pre-HSS parent, tshow=true → Clause B1 matches. - // The reply's own createdAt satisfies Clause B's outer gate - // (createdAt >= hss); tshow=true then fires B1 regardless of the - // parent's age. If the outer gate weren't there, a pre-HSS tshow=true - // reply would leak history the user never had access to. - t.Logf("seed: remote post-HSS reply with tshow=true, pre-HSS parent (Clause B1 match)") - seedDoc(t, f.remoteURL, monthIdxFor(postHSS), "msg-remote-reply-tshow", map[string]any{ - "messageId": "msg-remote-reply-tshow", - "roomId": remoteRoomID, - "siteId": "site-remote", - "userId": "user-carol", - "userAccount": "carol", - "content": "hello tshow reply", - "createdAt": postHSS.Add(time.Minute).Format(time.RFC3339Nano), - "threadParentMessageId": "msg-remote-pre-parent", - "threadParentMessageCreatedAt": preHSS.Format(time.RFC3339Nano), - "tshow": true, - }) - - // Post-HSS reply to a pre-HSS parent, tshow=false → Clause B rejects. - // Outer gate passes (reply createdAt >= hss) but the inner OR fails: - // tshow=false blocks B1 and the parent's pre-HSS createdAt blocks B2. - t.Logf("seed: remote post-HSS reply without tshow, pre-HSS parent (MUST NOT match)") - seedDoc(t, f.remoteURL, monthIdxFor(postHSS), "msg-remote-reply-plain", map[string]any{ - "messageId": "msg-remote-reply-plain", - "roomId": remoteRoomID, - "siteId": "site-remote", - "userId": "user-carol", - "userAccount": "carol", - "content": "hello plain reply", - "createdAt": postHSS.Add(2 * time.Minute).Format(time.RFC3339Nano), - "threadParentMessageId": "msg-remote-pre-parent", - "threadParentMessageCreatedAt": preHSS.Format(time.RFC3339Nano), - }) - - // --- Search --------------------------------------------------------- - reqData, err := json.Marshal(model.SearchMessagesRequest{Query: "hello"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchMessages(account), reqData, 30*time.Second) - require.NoError(t, err, "NATS request failed") - t.Logf("response: %s", msg.Data) - - var resp model.SearchMessagesResponse - require.NoError(t, json.Unmarshal(msg.Data, &resp), "decode response: %s", msg.Data) - - got := map[string]bool{} - for _, hit := range resp.Messages { - got[hit.MessageID] = true - } - - // Expected matches: - assert.True(t, got["msg-local-1"], "local unrestricted message must match via terms-lookup") - assert.True(t, got["msg-remote-post-parent"], "post-HSS remote parent must match via Clause A (CCS)") - assert.True(t, got["msg-remote-reply-tshow"], "post-HSS remote reply with tshow=true must match via Clause B1 (CCS)") - - // Expected exclusions: - assert.False(t, got["msg-remote-pre-parent"], "pre-HSS remote parent must be excluded by Clause A gate") - assert.False(t, got["msg-remote-reply-plain"], "post-HSS remote reply without tshow + pre-HSS parent must be excluded (outer gate passes; B1 and B2 both fail)") - - assert.EqualValues(t, 3, resp.Total, "expected exactly 3 hits; got body=%s", msg.Data) - require.Len(t, resp.Messages, 3, "expected 3 hits; got body=%s", msg.Data) -} - -// --- search.apps integration ------------------------------------------------ - -// setupAppsFixture starts an isolated Mongo container (via pkg/testutil) and -// a single search-service router bound to that DB. ES/Valkey are not used by -// search.apps, so we wire fakes (the existing `fakeStore` / `fakeCache` -// satisfy the interfaces but never get called on the apps path). -type appsFixture struct { - clientNATS *nats.Conn - mongoDB *mongo.Database -} - -func setupAppsFixture(t *testing.T) *appsFixture { - t.Helper() - ctx := context.Background() - - mongoDB := testutil.MongoDB(t, "search_service_test") - - // Start NATS (reuse the existing NATS container helper). - natsContainer, err := natsmod.Run(ctx, testimages.NATS, - testcontainers.WithWaitStrategy(wait.ForLog("Server is ready").WithStartupTimeout(60*time.Second)), - ) - require.NoError(t, err) - t.Cleanup(func() { _ = natsContainer.Terminate(ctx) }) - - natsURL, err := natsContainer.ConnectionString(ctx) - require.NoError(t, err) - - serverNATS, err := natsutil.Connect(natsURL, "") - require.NoError(t, err) - t.Cleanup(func() { _ = serverNATS.Drain() }) - - clientNATS, err := nats.Connect(natsURL) - require.NoError(t, err) - t.Cleanup(func() { clientNATS.Close() }) - - // Wire the handler with a real mongoStore and stub ES/cache. - mongoStore := newMongoStore(mongoDB) - store := &fakeStore{} - cache := newFakeCache() - h := newHandler(store, mongoStore, nil, cache, handlerConfig{ - DocCounts: 25, - MaxDocCounts: 100, - RestrictedRoomsCacheTTL: 5 * time.Minute, - RecentWindow: 365 * 24 * time.Hour, - RequestTimeout: 5 * time.Second, - SpotlightReadPattern: "spotlight-*", - }) - - router := natsrouter.New(serverNATS, "search-service-test") - router.Use(natsrouter.RequestID()) - h.Register(router) - // Flush ensures subscriptions are registered on the server before the - // fixture returns. Without this, fast tests that fire a request - // immediately can hit "no responders available" while subscriptions - // are still propagating. natsutil.Connect returns an otelnats.Conn - // wrapper that doesn't expose Flush; reach through to the underlying - // *nats.Conn. - require.NoError(t, serverNATS.NatsConn().Flush()) - t.Cleanup(func() { - _ = router.Shutdown(context.Background()) - }) - - return &appsFixture{clientNATS: clientNATS, mongoDB: mongoDB} -} - -func TestIntegration_SearchApps_PrototypePipeline(t *testing.T) { - f := setupAppsFixture(t) - ctx := context.Background() - - // Seed 3 apps in Mongo. The prototype pipeline matches by `name` regex - // (case-insensitive) and applies $limit; the full $lookup access-guard - // pipeline is implemented in a follow-up. - _, err := f.mongoDB.Collection("apps").InsertMany(ctx, []any{ - map[string]any{"_id": "a1", "name": "Weather Alpha", "assistant": map[string]any{"enabled": true, "name": "weather.bot"}}, - map[string]any{"_id": "a2", "name": "Weatherly", "assistant": map[string]any{"enabled": false, "name": "weatherly.bot"}}, - map[string]any{"_id": "a3", "name": "Calendar"}, - }) - require.NoError(t, err) - - reqBytes, err := json.Marshal(model.SearchAppsRequest{Query: "weather"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchApps("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var resp model.SearchAppsResponse - require.NoError(t, json.Unmarshal(msg.Data, &resp)) - - require.Len(t, resp.Apps, 2, "two apps match the 'weather' regex") - names := []string{resp.Apps[0].Name, resp.Apps[1].Name} - assert.Contains(t, names, "Weather Alpha") - assert.Contains(t, names, "Weatherly") -} - -func TestIntegration_SearchApps_AssistantEnabledFilter(t *testing.T) { - f := setupAppsFixture(t) - ctx := context.Background() - - _, err := f.mongoDB.Collection("apps").InsertMany(ctx, []any{ - map[string]any{"_id": "a1", "name": "Weather Alpha", "assistant": map[string]any{"enabled": true, "name": "weather.bot"}}, - map[string]any{"_id": "a2", "name": "Weatherly", "assistant": map[string]any{"enabled": false, "name": "weatherly.bot"}}, - }) - require.NoError(t, err) - - enabled := true - reqBytes, err := json.Marshal(model.SearchAppsRequest{ - Query: "weather", - AssistantEnabled: &enabled, - }) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchApps("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var resp model.SearchAppsResponse - require.NoError(t, json.Unmarshal(msg.Data, &resp)) - - require.Len(t, resp.Apps, 1) - assert.Equal(t, "Weather Alpha", resp.Apps[0].Name) -} - -func TestIntegration_SearchApps_EmptyQueryReturnsBadRequest(t *testing.T) { - f := setupAppsFixture(t) - - reqBytes, err := json.Marshal(model.SearchAppsRequest{Query: ""}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchApps("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var envelope model.ErrorResponse - require.NoError(t, json.Unmarshal(msg.Data, &envelope)) - require.NotEmpty(t, envelope.Error) - assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) -} - -// --- search.users integration ------------------------------------------------ - -// usersFixture is a minimal fixture for the search.users path: NATS for the -// request/reply layer, and an httptest.Server standing in for the third-party -// HR endpoint. No Mongo or ES containers are needed. -type usersFixture struct { - clientNATS *nats.Conn - thirdParty *httptest.Server // controls the stub response -} - -func setupUsersFixture(t *testing.T, thirdPartyHandler http.Handler) *usersFixture { - t.Helper() - - // Start the stub third-party server. - stub := httptest.NewServer(thirdPartyHandler) - t.Cleanup(stub.Close) - - // NATS. - natsURL := startNATS(t) - serverNC, err := natsutil.Connect(natsURL, "") - require.NoError(t, err, "connect nats (server side)") - t.Cleanup(func() { _ = serverNC.Drain() }) - - clientNC, err := nats.Connect(natsURL) - require.NoError(t, err, "connect nats (client side)") - t.Cleanup(func() { clientNC.Close() }) - - // Wire the handler with a real httpUsersClient pointing at the stub. - usersRC := restyutil.New(stub.URL, restyutil.WithTimeout(5*time.Second)) - usersClient := newHTTPUsersClient(usersRC, "") - - h := newHandler(nil, nil, usersClient, newFakeCache(), handlerConfig{ - DocCounts: 25, - MaxDocCounts: 100, - RequestTimeout: 5 * time.Second, - }) - - router := natsrouter.New(serverNC, "search-service-test") - router.Use(natsrouter.RequestID()) - h.Register(router) - // Flush — see setupAppsFixture for the rationale. - require.NoError(t, serverNC.NatsConn().Flush()) - t.Cleanup(func() { _ = router.Shutdown(context.Background()) }) - - return &usersFixture{clientNATS: clientNC, thirdParty: stub} -} - -func TestIntegration_SearchUsers_Happy(t *testing.T) { - // Stub returns two users matching the query. - stubResp := `[{"account":"alice","engName":"Alice Wang"},{"account":"alice2","engName":"Alice Chen"}]` - - f := setupUsersFixture(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte(stubResp)) - })) - - reqBytes, err := json.Marshal(model.SearchUsersRequest{Query: "alice"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchUsers("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var users []model.SearchUser - require.NoError(t, json.Unmarshal(msg.Data, &users)) - - require.Len(t, users, 2) - assert.Equal(t, "alice", users[0].Account) - assert.Equal(t, "Alice Wang", users[0].EngName) -} - -func TestIntegration_SearchUsers_EmptyQueryReturnsBadRequest(t *testing.T) { - // Stub should never be called for a bad-request scenario. - f := setupUsersFixture(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - t.Error("third-party stub should not be called for empty query") - w.WriteHeader(http.StatusInternalServerError) - })) - - reqBytes, err := json.Marshal(model.SearchUsersRequest{Query: ""}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchUsers("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var envelope model.ErrorResponse - require.NoError(t, json.Unmarshal(msg.Data, &envelope)) - require.NotEmpty(t, envelope.Error) - assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) -} - -func TestIntegration_SearchUsers_ThirdPartyErrorReturnsInternal(t *testing.T) { - // Stub returns a 503 to simulate a backend outage. - f := setupUsersFixture(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusServiceUnavailable) - })) - - reqBytes, err := json.Marshal(model.SearchUsersRequest{Query: "alice"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchUsers("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var envelope model.ErrorResponse - require.NoError(t, json.Unmarshal(msg.Data, &envelope)) - require.NotEmpty(t, envelope.Error) - assert.Equal(t, natsrouter.CodeInternal, envelope.Code, - "non-2xx from third-party must surface as internal error, not raw status") - // Raw third-party details must not leak to the caller. - assert.NotContains(t, envelope.Error, "503", "status code from third-party must not leak") -} - -// --- search.rooms integration ---------------------------------------- - -// roomsFixture wires a real ES container (for the spotlight index) and -// NATS. search.rooms is served directly from the spotlight index, so no -// Mongo is involved. -type roomsFixture struct { - clientNATS *nats.Conn - esURL string -} - -// setupRoomsFixture stands up ES (spotlight index) and NATS. It registers -// t.Cleanup for all containers and returns a ready fixture. -func setupRoomsFixture(t *testing.T) *roomsFixture { - t.Helper() - ctx := context.Background() - - // Single ES node — no CCS needed; spotlight is always local. - container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ - ContainerRequest: testcontainers.ContainerRequest{ - Image: testimages.Elasticsearch, - ExposedPorts: []string{"9200/tcp"}, - Env: map[string]string{ - "discovery.type": "single-node", - "xpack.security.enabled": "false", - "ES_JAVA_OPTS": "-Xms512m -Xmx512m", - "cluster.routing.allocation.disk.threshold_enabled": "false", - }, - WaitingFor: wait.ForAll( - wait.ForHTTP("/").WithPort("9200/tcp").WithStartupTimeout(120*time.Second), - wait.ForHTTP("/_cluster/health?wait_for_status=yellow&timeout=60s"). - WithPort("9200/tcp"). - WithStartupTimeout(120*time.Second), - ), - }, - Started: true, - }) - require.NoError(t, err, "start elasticsearch for subs fixture") - t.Cleanup(func() { _ = container.Terminate(ctx) }) - - host, err := container.Host(ctx) - require.NoError(t, err) - port, err := container.MappedPort(ctx, "9200") - require.NoError(t, err) - esURL := fmt.Sprintf("http://%s:%s", host, port.Port()) - - spotlightIndex := "spotlight-subs-test" - putTestSpotlightIndex(t, esURL, spotlightIndex) - - natsURL := startNATS(t) - serverNC, err := natsutil.Connect(natsURL, "") - require.NoError(t, err, "connect nats (server side)") - t.Cleanup(func() { _ = serverNC.Drain() }) - - clientNC, err := nats.Connect(natsURL) - require.NoError(t, err, "connect nats (client side)") - t.Cleanup(func() { clientNC.Close() }) - - engine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: esURL}) - require.NoError(t, err, "build searchengine for subs fixture") - - esStore := newESStore(engine, testUserRoomIndex) - cache := newValkeyCache(newSubsValkeyClient(t)) - h := newHandler(esStore, nil, nil, cache, handlerConfig{ - DocCounts: 25, - MaxDocCounts: 100, - RestrictedRoomsCacheTTL: 5 * time.Minute, - RecentWindow: 365 * 24 * time.Hour, - RequestTimeout: 5 * time.Second, - SpotlightReadPattern: spotlightIndex, - }) - - router := natsrouter.New(serverNC, "search-service-test-subs") - router.Use(natsrouter.RequestID()) - h.Register(router) - // Flush — see setupAppsFixture for the rationale. - require.NoError(t, serverNC.NatsConn().Flush()) - t.Cleanup(func() { _ = router.Shutdown(context.Background()) }) - - return &roomsFixture{clientNATS: clientNC, esURL: esURL} -} - -func newSubsValkeyClient(t *testing.T) valkeyutil.Client { - t.Helper() - return valkeyutil.WrapClusterClient(testutil.StartValkeyCluster(t)) -} - -// putTestSpotlightIndex creates a minimal spotlight index in ES with the -// fields needed by the subscription search query. -func putTestSpotlightIndex(t *testing.T, esURL, index string) { - t.Helper() - body := map[string]any{ - "settings": map[string]any{ - "number_of_shards": 1, - "number_of_replicas": 0, - "refresh_interval": "1s", - }, - "mappings": map[string]any{ - "dynamic": false, - "properties": map[string]any{ - "roomId": map[string]any{"type": "keyword"}, - "roomName": map[string]any{ - "type": "search_as_you_type", - }, - "roomType": map[string]any{"type": "keyword"}, - "userAccount": map[string]any{"type": "keyword"}, - "siteId": map[string]any{"type": "keyword"}, - "joinedAt": map[string]any{"type": "date"}, - }, - }, - } - data, _ := json.Marshal(body) - req, err := http.NewRequest(http.MethodPut, esURL+"/"+index, bytes.NewReader(data)) - require.NoError(t, err) - req.Header.Set("Content-Type", "application/json") - resp, err := testHTTPClient.Do(req) - require.NoError(t, err) - defer resp.Body.Close() - b, _ := io.ReadAll(resp.Body) - require.True(t, resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusCreated, - "create spotlight index: status=%d body=%s", resp.StatusCode, b) -} - -func TestIntegration_SearchRooms_HappyPath(t *testing.T) { - f := setupRoomsFixture(t) - - const account = "alice" - now := time.Now().UTC() - - // Seed spotlight docs for two rooms alice is in. - seedDoc(t, f.esURL, "spotlight-subs-test", "spot-r1", map[string]any{ - "roomId": "r1", - "roomName": "engineering-announcements", - "roomType": "channel", - "userAccount": account, - "siteId": "site-local", - "joinedAt": now.Add(-48 * time.Hour).Format(time.RFC3339), - }) - seedDoc(t, f.esURL, "spotlight-subs-test", "spot-r2", map[string]any{ - "roomId": "r2", - "roomName": "engineering-random", - "roomType": "channel", - "userAccount": account, - "siteId": "site-local", - "joinedAt": now.Add(-24 * time.Hour).Format(time.RFC3339), - }) - // A matching room owned by a different account. With the Mongo - // hydration removed, the spotlight userAccount term filter is the - // sole access boundary — this must not leak into alice's results. - seedDoc(t, f.esURL, "spotlight-subs-test", "spot-r3", map[string]any{ - "roomId": "r3", - "roomName": "engineering-secret", - "roomType": "channel", - "userAccount": "mallory", - "siteId": "site-local", - "joinedAt": now.Add(-12 * time.Hour).Format(time.RFC3339), - }) - - reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: "engineering"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchRooms(account), reqBytes, 10*time.Second) - require.NoError(t, err) - - var resp model.SearchRoomsResponse - require.NoError(t, json.Unmarshal(msg.Data, &resp)) - - require.Len(t, resp.Rooms, 2, "both rooms matching 'engineering' must be returned") - byID := map[string]model.SearchRoom{} - for _, r := range resp.Rooms { - byID[r.RoomID] = r - } - assert.Equal(t, model.SearchRoom{RoomID: "r1", Name: "engineering-announcements", RoomType: "channel", SiteID: "site-local"}, byID["r1"]) - assert.Equal(t, model.SearchRoom{RoomID: "r2", Name: "engineering-random", RoomType: "channel", SiteID: "site-local"}, byID["r2"]) - _, leaked := byID["r3"] - assert.False(t, leaked, "rooms owned by another account must not leak") -} - -func TestIntegration_SearchRooms_RoomTypeChannelFilter(t *testing.T) { - f := setupRoomsFixture(t) - - const account = "bob" - now := time.Now().UTC() - - seedDoc(t, f.esURL, "spotlight-subs-test", "spot-b-r1", map[string]any{ - "roomId": "b-r1", - "roomName": "bob-alice", - "roomType": "dm", - "userAccount": account, - "siteId": "site-local", - "joinedAt": now.Add(-1 * time.Hour).Format(time.RFC3339), - }) - seedDoc(t, f.esURL, "spotlight-subs-test", "spot-b-r2", map[string]any{ - "roomId": "b-r2", - "roomName": "bob-channel", - "roomType": "channel", - "userAccount": account, - "siteId": "site-local", - "joinedAt": now.Add(-2 * time.Hour).Format(time.RFC3339), - }) - - reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: "bob", RoomType: "channel"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchRooms(account), reqBytes, 10*time.Second) - require.NoError(t, err) - - var resp model.SearchRoomsResponse - require.NoError(t, json.Unmarshal(msg.Data, &resp)) - - require.Len(t, resp.Rooms, 1) - assert.Equal(t, model.SearchRoom{RoomID: "b-r2", Name: "bob-channel", RoomType: "channel", SiteID: "site-local"}, resp.Rooms[0], - "only the channel room must match roomType=channel filter") -} - -func TestIntegration_SearchRooms_EmptyQueryReturnsBadRequest(t *testing.T) { - f := setupRoomsFixture(t) - - reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: ""}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchRooms("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var envelope model.ErrorResponse - require.NoError(t, json.Unmarshal(msg.Data, &envelope)) - require.NotEmpty(t, envelope.Error) - assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) -} - -func TestIntegration_SearchRooms_RoomTypeAppReturnsBadRequest(t *testing.T) { - f := setupRoomsFixture(t) - - reqBytes, err := json.Marshal(model.SearchRoomsRequest{Query: "x", RoomType: "app"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchRooms("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var envelope model.ErrorResponse - require.NoError(t, json.Unmarshal(msg.Data, &envelope)) - require.NotEmpty(t, envelope.Error) - assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) - assert.Contains(t, envelope.Error, "invalid roomType") -} - -// --- search.messages v2 integration ----------------------------------------- - -// messagesV2Fixture stubs ES with a fake HTTP server (httptest). The -// messages path is pure ES — no Mongo round-trip — so no Mongo fixture -// is wired. -type messagesV2Fixture struct { - clientNATS *nats.Conn -} - -func setupMessagesV2Fixture(t *testing.T) *messagesV2Fixture { - t.Helper() - ctx := context.Background() - - // Stub ES: always return a canned response containing one hit. - esStub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Drain the body so the HTTP/1.1 connection stays open. - _, _ = io.Copy(io.Discard, r.Body) - // The Elastic Go client performs a "product check" handshake on - // connect and rejects any server that doesn't advertise itself - // as Elasticsearch via this header. Set it on every response so - // the stub passes the check regardless of which endpoint is hit. - w.Header().Set("X-Elastic-Product", "Elasticsearch") - w.Header().Set("Content-Type", "application/json") - _, _ = w.Write([]byte(`{"hits":{"total":{"value":1},"hits":[{"_source":{` + - `"messageId":"m1","roomId":"r1","siteId":"site-a","userId":"u1",` + - `"userAccount":"alice","content":"hello","createdAt":"2026-04-01T12:00:00Z"}}]}}`)) - })) - t.Cleanup(esStub.Close) - - // Valkey stub — use the fakeCache wired in-process via handler injection. - fakeValkey := newFakeCache() - fakeValkey.store["alice"] = map[string]int64{} // empty restricted map, cache hit - - // NATS - natsURL := startNATS(t) - - serverNATS, err := natsutil.Connect(natsURL, "") - require.NoError(t, err) - t.Cleanup(func() { _ = serverNATS.Drain() }) - - clientNATS, err := nats.Connect(natsURL) - require.NoError(t, err) - t.Cleanup(func() { clientNATS.Close() }) - - // Wire search-service with the stub ES engine. No Mongo store needed - // for the messages path. - engine, err := searchengine.New(ctx, searchengine.Config{Backend: "elasticsearch", URL: esStub.URL}) - require.NoError(t, err) - esStore := newESStore(engine, testUserRoomIndex) - - h := newHandler(esStore, nil, nil, fakeValkey, handlerConfig{ - DocCounts: 25, - MaxDocCounts: 100, - RestrictedRoomsCacheTTL: 5 * time.Minute, - RecentWindow: 365 * 24 * time.Hour, - RequestTimeout: 5 * time.Second, - UserRoomIndex: testUserRoomIndex, - SpotlightReadPattern: "spotlight-*", - }) - - router := natsrouter.New(serverNATS, "search-service-test-v2") - router.Use(natsrouter.RequestID()) - h.Register(router) - // Flush — see setupAppsFixture for the rationale. - require.NoError(t, serverNATS.NatsConn().Flush()) - t.Cleanup(func() { _ = router.Shutdown(context.Background()) }) - - return &messagesV2Fixture{clientNATS: clientNATS} -} - -func TestIntegration_SearchMessages_V2_HitProjection(t *testing.T) { - f := setupMessagesV2Fixture(t) - - reqBytes, err := json.Marshal(model.SearchMessagesRequest{Query: "hello"}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchMessages("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var resp model.SearchMessagesResponse - require.NoError(t, json.Unmarshal(msg.Data, &resp)) - - require.Len(t, resp.Messages, 1) - assert.EqualValues(t, 1, resp.Total) - - got := resp.Messages[0] - assert.Equal(t, "m1", got.MessageID) - assert.Equal(t, "r1", got.RoomID) - assert.Equal(t, "site-a", got.SiteID) - assert.Equal(t, "alice", got.UserAccount) - assert.Equal(t, "hello", got.Content) -} - -func TestIntegration_SearchMessages_V2_EmptyQueryReturnsBadRequest(t *testing.T) { - f := setupMessagesV2Fixture(t) - - reqBytes, err := json.Marshal(model.SearchMessagesRequest{Query: ""}) - require.NoError(t, err) - - msg, err := f.clientNATS.Request(subject.SearchMessages("alice"), reqBytes, 5*time.Second) - require.NoError(t, err) - - var envelope model.ErrorResponse - require.NoError(t, json.Unmarshal(msg.Data, &envelope)) - assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) -} diff --git a/search-service/integration_users_test.go b/search-service/integration_users_test.go new file mode 100644 index 000000000..2b26c24de --- /dev/null +++ b/search-service/integration_users_test.go @@ -0,0 +1,104 @@ +//go:build integration + +package main + +// Integration tests for search.users (NATS + httptest stub for HR endpoint). + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/nats-io/nats.go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/natsrouter" + "github.com/hmchangw/chat/pkg/restyutil" + "github.com/hmchangw/chat/pkg/subject" +) + +type usersFixture struct { + clientNATS *nats.Conn + thirdParty *httptest.Server +} + +func setupUsersFixture(t *testing.T, thirdPartyHandler http.Handler) *usersFixture { + t.Helper() + stub := httptest.NewServer(thirdPartyHandler) + t.Cleanup(stub.Close) + + usersRC := restyutil.New(stub.URL, restyutil.WithTimeout(5*time.Second)) + h := newHandler(nil, nil, newHTTPUsersClient(usersRC, ""), newFakeCache(), handlerConfig{ + DocCounts: 25, + MaxDocCounts: 100, + RequestTimeout: 5 * time.Second, + }) + clientNC := setupRouter(t, testQueueGroup, h.Register) + return &usersFixture{clientNATS: clientNC, thirdParty: stub} +} + +func TestIntegration_SearchUsers_Happy(t *testing.T) { + stubResp := `[{"account":"alice","engName":"Alice Wang"},{"account":"alice2","engName":"Alice Chen"}]` + + f := setupUsersFixture(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(stubResp)) + })) + + reqBytes, err := json.Marshal(model.SearchUsersRequest{Query: "alice"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchUsers("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var users []model.SearchUser + require.NoError(t, json.Unmarshal(msg.Data, &users)) + + require.Len(t, users, 2) + assert.Equal(t, "alice", users[0].Account) + assert.Equal(t, "Alice Wang", users[0].EngName) +} + +func TestIntegration_SearchUsers_EmptyQueryReturnsBadRequest(t *testing.T) { + // Stub should never be called for a bad-request scenario. + f := setupUsersFixture(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + t.Error("third-party stub should not be called for empty query") + w.WriteHeader(http.StatusInternalServerError) + })) + + reqBytes, err := json.Marshal(model.SearchUsersRequest{Query: ""}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchUsers("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var envelope model.ErrorResponse + require.NoError(t, json.Unmarshal(msg.Data, &envelope)) + require.NotEmpty(t, envelope.Error) + assert.Equal(t, natsrouter.CodeBadRequest, envelope.Code) +} + +func TestIntegration_SearchUsers_ThirdPartyErrorReturnsInternal(t *testing.T) { + f := setupUsersFixture(t, http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + + reqBytes, err := json.Marshal(model.SearchUsersRequest{Query: "alice"}) + require.NoError(t, err) + + msg, err := f.clientNATS.Request(subject.SearchUsers("alice"), reqBytes, 5*time.Second) + require.NoError(t, err) + + var envelope model.ErrorResponse + require.NoError(t, json.Unmarshal(msg.Data, &envelope)) + require.NotEmpty(t, envelope.Error) + assert.Equal(t, natsrouter.CodeInternal, envelope.Code, + "non-2xx from third-party must surface as internal error, not raw status") + // Raw third-party details must not leak to the caller. + assert.NotContains(t, envelope.Error, "503", "status code from third-party must not leak") +} diff --git a/search-service/setup_shared_test.go b/search-service/setup_shared_test.go new file mode 100644 index 000000000..2a86d34dc --- /dev/null +++ b/search-service/setup_shared_test.go @@ -0,0 +1,87 @@ +//go:build integration + +package main + +// ES / NATS / Valkey / Mongo come from pkg/testutil. CCS tests bring +// their own ES pair (integration_ccs_test.go). + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "testing" + "time" + + "github.com/nats-io/nats.go" + "github.com/stretchr/testify/require" + + "github.com/hmchangw/chat/pkg/natsrouter" + "github.com/hmchangw/chat/pkg/natsutil" + "github.com/hmchangw/chat/pkg/testutil" +) + +const testUserRoomIndex = "user-room" + +// NATS queue groups. Each search-service router gets its own so a slow +// drain after one test can't deliver to a sibling test's handler. +const ( + testQueueGroup = "search-service-test" // apps, users, CCS + testQueueGroupSubs = "search-service-test-subs" // rooms + testQueueGroupV2 = "search-service-test-v2" // messages v2 +) + +// Bounded HTTP client for ES control-plane calls. +var testHTTPClient = &http.Client{Timeout: 10 * time.Second} + +func seedDoc(t *testing.T, esURL, index, id string, doc any) { + t.Helper() + data, err := json.Marshal(doc) + require.NoError(t, err) + url := fmt.Sprintf("%s/%s/_doc/%s?refresh=true", esURL, index, id) + req, err := http.NewRequest(http.MethodPut, url, bytes.NewReader(data)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + resp, err := testHTTPClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + require.Truef(t, resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusOK, + "seedDoc %s/%s: status=%d body=%s", index, id, resp.StatusCode, body) +} + +func TestMain(m *testing.M) { + testutil.RunTestsWithPrewarm(m, + testutil.EnsureElasticsearch, + testutil.EnsureNATS, + testutil.EnsureValkey, + testutil.EnsureMongo, + ) +} + +// setupRouter wires the NATS plumbing shared by every search-service +// fixture: server+client conns against the shared NATS, a router with the +// given queue group, RequestID middleware, register, flush, and cleanups. +// The Flush is required because otelnats wraps the conn — subscriptions +// don't reach the server otherwise before tests publish. +func setupRouter(t *testing.T, queueGroup string, register func(*natsrouter.Router)) *nats.Conn { + t.Helper() + natsURL := testutil.NATS(t) + serverNC, err := natsutil.Connect(natsURL, "") + require.NoError(t, err, "connect nats (server side)") + t.Cleanup(func() { _ = serverNC.Drain() }) + + clientNC, err := nats.Connect(natsURL) + require.NoError(t, err, "connect nats (client side)") + t.Cleanup(func() { clientNC.Close() }) + + router := natsrouter.New(serverNC, queueGroup) + router.Use(natsrouter.RequestID()) + register(router) + require.NoError(t, serverNC.NatsConn().Flush()) + t.Cleanup(func() { _ = router.Shutdown(context.Background()) }) + + return clientNC +} diff --git a/search-sync-worker/integration_test.go b/search-sync-worker/integration_test.go index 6abc1677f..ece2a40ee 100644 --- a/search-sync-worker/integration_test.go +++ b/search-sync-worker/integration_test.go @@ -11,6 +11,7 @@ import ( "net/http" "net/url" "os" + "sync" "testing" "time" @@ -18,139 +19,69 @@ import ( "github.com/nats-io/nats.go/jetstream" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" "github.com/hmchangw/chat/pkg/model" "github.com/hmchangw/chat/pkg/searchengine" "github.com/hmchangw/chat/pkg/stream" "github.com/hmchangw/chat/pkg/subject" - "github.com/hmchangw/chat/pkg/testutil/testimages" + "github.com/hmchangw/chat/pkg/testutil" ) -// Package-level singletons — one Elasticsearch + one NATS JetStream container -// shared across all tests in this package. Tests isolate themselves via unique -// index / stream names (already the case in this suite). On VFS storage, -// spawning ES per-test is prohibitive — a 120s startup * 7 tests = 14min. +// Package-level NATS connection + JetStream client. Connected once in +// TestMain and shared by every test. The underlying NATS and ES +// containers come from pkg/testutil. var ( - testESURL string - testJS jetstream.JetStream - testNATSCon *nats.Conn + testJS jetstream.JetStream + testNATSCon *nats.Conn + testNATSConErr error + testNATSOnce sync.Once ) +// TestMain pre-warms shared containers in parallel; fails fast on error. +// Custom wrap (not testutil.RunTestsWithPrewarm) so we can close the +// lazy-init JetStream conn between m.Run and TerminateAll. func TestMain(m *testing.M) { - // Wrap the setup logic in an inner function so `defer` runs for every - // successfully-created resource before TestMain returns, regardless of - // which error branch we take. Keeps cleanup in one place instead of - // reinvented cascades at each error site. - os.Exit(runTestMain(m)) -} - -func runTestMain(m *testing.M) int { - ctx := context.Background() - - esContainer, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ - ContainerRequest: testcontainers.ContainerRequest{ - Image: testimages.Elasticsearch, - ExposedPorts: []string{"9200/tcp"}, - Env: map[string]string{ - "discovery.type": "single-node", - "xpack.security.enabled": "false", - "cluster.routing.allocation.disk.threshold_enabled": "false", - "ES_JAVA_OPTS": "-Xms512m -Xmx512m", - }, - WaitingFor: wait.ForAll( - wait.ForHTTP("/").WithPort("9200/tcp").WithStartupTimeout(120*time.Second), - wait.ForHTTP("/_cluster/health?wait_for_status=yellow&timeout=60s"). - WithPort("9200/tcp"). - WithStartupTimeout(120*time.Second), - ), - }, - Started: true, - }) - if err != nil { - fmt.Fprintf(os.Stderr, "start elasticsearch: %v\n", err) - return 1 + if err := testutil.PrewarmFailFast(testutil.EnsureElasticsearch, testutil.EnsureNATS); err != nil { + fmt.Fprintf(os.Stderr, "prewarm shared containers: %v\n", err) + testutil.TerminateAll() + os.Exit(1) } - defer func() { - if err := esContainer.Terminate(ctx); err != nil { - fmt.Fprintf(os.Stderr, "terminate elasticsearch: %v\n", err) - } - }() - esHost, err := esContainer.Host(ctx) - if err != nil { - fmt.Fprintf(os.Stderr, "get es host: %v\n", err) - return 1 - } - esPort, err := esContainer.MappedPort(ctx, "9200") - if err != nil { - fmt.Fprintf(os.Stderr, "get es port: %v\n", err) - return 1 - } - testESURL = fmt.Sprintf("http://%s:%s", esHost, esPort.Port()) - - natsContainer, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ - ContainerRequest: testcontainers.ContainerRequest{ - Image: testimages.NATS, - ExposedPorts: []string{"4222/tcp"}, - Cmd: []string{"--jetstream"}, - WaitingFor: wait.ForLog("Server is ready").WithStartupTimeout(30 * time.Second), - }, - Started: true, - }) - if err != nil { - fmt.Fprintf(os.Stderr, "start nats: %v\n", err) - return 1 + code := m.Run() + if testNATSCon != nil { + testNATSCon.Close() } - defer func() { - if err := natsContainer.Terminate(ctx); err != nil { - fmt.Fprintf(os.Stderr, "terminate nats: %v\n", err) - } - }() - natsHost, err := natsContainer.Host(ctx) - if err != nil { - fmt.Fprintf(os.Stderr, "get nats host: %v\n", err) - return 1 - } - natsPort, err := natsContainer.MappedPort(ctx, "4222") - if err != nil { - fmt.Fprintf(os.Stderr, "get nats port: %v\n", err) - return 1 - } - natsURL := fmt.Sprintf("nats://%s:%s", natsHost, natsPort.Port()) - nc, err := nats.Connect(natsURL) - if err != nil { - fmt.Fprintf(os.Stderr, "connect nats: %v\n", err) - return 1 - } - defer nc.Close() - js, err := jetstream.New(nc) - if err != nil { - fmt.Fprintf(os.Stderr, "init jetstream: %v\n", err) - return 1 - } - testJS = js - testNATSCon = nc - - return m.Run() + testutil.TerminateAll() + os.Exit(code) } // setupElasticsearch returns the shared ES URL. Tests must use unique index // names to stay isolated — the existing suite does. func setupElasticsearch(t *testing.T) string { t.Helper() - if testESURL == "" { - t.Fatal("testESURL is empty — TestMain did not run") - } - return testESURL + return testutil.Elasticsearch(t) } // setupNATSJetStream returns the shared (JetStream, Conn). Tests must use // unique stream names to stay isolated — the existing suite does. func setupNATSJetStream(t *testing.T) (jetstream.JetStream, *nats.Conn) { t.Helper() - if testJS == nil || testNATSCon == nil { - t.Fatal("testJS/testNATSCon is nil — TestMain did not run") + testNATSOnce.Do(func() { + nc, err := nats.Connect(testutil.NATS(t)) + if err != nil { + testNATSConErr = fmt.Errorf("connect nats: %w", err) + return + } + js, err := jetstream.New(nc) + if err != nil { + nc.Close() + testNATSConErr = fmt.Errorf("init jetstream: %w", err) + return + } + testNATSCon = nc + testJS = js + }) + if testNATSConErr != nil { + t.Fatalf("nats jetstream setup: %v", testNATSConErr) } return testJS, testNATSCon } diff --git a/tools/loadgen/integration_test.go b/tools/loadgen/integration_test.go index b1a0f2948..2fdf23343 100644 --- a/tools/loadgen/integration_test.go +++ b/tools/loadgen/integration_test.go @@ -5,69 +5,27 @@ package main import ( "context" "encoding/json" - "fmt" "testing" "time" "github.com/nats-io/nats.go" "github.com/nats-io/nats.go/jetstream" "github.com/stretchr/testify/require" - "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/modules/mongodb" - "github.com/testcontainers/testcontainers-go/wait" "go.mongodb.org/mongo-driver/v2/bson" "github.com/hmchangw/chat/pkg/model" - "github.com/hmchangw/chat/pkg/mongoutil" "github.com/hmchangw/chat/pkg/stream" "github.com/hmchangw/chat/pkg/subject" - "github.com/hmchangw/chat/pkg/testutil/testimages" + "github.com/hmchangw/chat/pkg/testutil" ) -// setupNATS starts a JetStream-enabled NATS container via the generic -// testcontainers interface (no dedicated NATS module is required). -func setupNATS(t *testing.T) (string, func()) { - t.Helper() - ctx := context.Background() - c, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ - ContainerRequest: testcontainers.ContainerRequest{ - Image: testimages.NATS, - Cmd: []string{"-js"}, - ExposedPorts: []string{"4222/tcp"}, - WaitingFor: wait.ForLog("Server is ready").WithStartupTimeout(30 * time.Second), - }, - Started: true, - }) - require.NoError(t, err) - host, err := c.Host(ctx) - require.NoError(t, err) - port, err := c.MappedPort(ctx, "4222") - require.NoError(t, err) - return fmt.Sprintf("nats://%s:%s", host, port.Port()), func() { _ = c.Terminate(ctx) } -} - -func setupMongo(t *testing.T) (string, func()) { - t.Helper() - ctx := context.Background() - c, err := mongodb.Run(ctx, testimages.Mongo) - require.NoError(t, err) - uri, err := c.ConnectionString(ctx) - require.NoError(t, err) - return uri, func() { _ = c.Terminate(ctx) } -} - // TestLoadgenSmallPreset_EndToEnd verifies the generator publishes messages, // a fake gatekeeper forwards them to MESSAGES_CANONICAL, two JetStream // consumers drain the stream, a fake broadcast-worker emits room events, // and MongoDB shows the seeded room data. func TestLoadgenSmallPreset_EndToEnd(t *testing.T) { ctx := context.Background() - natsURI, stopNATS := setupNATS(t) - defer stopNATS() - mongoURI, stopMongo := setupMongo(t) - defer stopMongo() - - nc, err := nats.Connect(natsURI) + nc, err := nats.Connect(testutil.NATS(t)) require.NoError(t, err) defer nc.Drain() @@ -95,11 +53,7 @@ func TestLoadgenSmallPreset_EndToEnd(t *testing.T) { defer cc.Stop() } - // Connect Mongo and seed fixtures. - client, err := mongoutil.Connect(ctx, mongoURI, "", "") - require.NoError(t, err) - defer mongoutil.Disconnect(ctx, client) - db := client.Database("chat") + db := testutil.MongoDB(t, "loadgen") preset, _ := BuiltinPreset("small") fixtures := BuildFixtures(&preset, 42, siteID) @@ -168,10 +122,8 @@ func TestLoadgenSmallPreset_EndToEnd(t *testing.T) { defer cancel() require.NoError(t, gen.Run(runCtx)) - // Allow trailing events to flow. time.Sleep(2 * time.Second) - // Assert the canonical stream drained. for _, durable := range []string{"message-worker", "broadcast-worker"} { cons, err := js.Consumer(ctx, canonical.Name, durable) require.NoError(t, err) @@ -180,9 +132,10 @@ func TestLoadgenSmallPreset_EndToEnd(t *testing.T) { require.Equal(t, uint64(0), info.NumPending, "durable %s still has pending", durable) } - // Assert seed data is visible in Mongo. var room model.Room err = db.Collection("rooms").FindOne(ctx, bson.M{"_id": fixtures.Rooms[0].ID}).Decode(&room) require.NoError(t, err) require.Equal(t, fixtures.Rooms[0].ID, room.ID) } + +func TestMain(m *testing.M) { testutil.RunTests(m) } diff --git a/tools/loadgen/main_test.go b/tools/loadgen/main_test.go index 959edf7b4..1c2196f67 100644 --- a/tools/loadgen/main_test.go +++ b/tools/loadgen/main_test.go @@ -234,7 +234,7 @@ func TestDispatch_MembersSustained_UnknownPreset(t *testing.T) { oldArgs := os.Args defer func() { os.Args = oldArgs }() os.Args = []string{"loadgen", "members-sustained", "--preset=nope"} - cfg := &config{NatsURL: "nats://localhost:1", MongoURI: "mongodb://localhost:1", ValkeyAddr: "localhost:1"} + cfg := &config{NatsURL: "nats://localhost:1", MongoURI: "mongodb://localhost:1", ValkeyAddrs: []string{"localhost:1"}} code := dispatch(context.Background(), cfg) assert.Equal(t, 2, code) } @@ -243,7 +243,7 @@ func TestDispatch_MembersSustained_RejectsBadShape(t *testing.T) { oldArgs := os.Args defer func() { os.Args = oldArgs }() os.Args = []string{"loadgen", "members-sustained", "--preset=members-small", "--shape=orgs"} - cfg := &config{NatsURL: "nats://localhost:1", MongoURI: "mongodb://localhost:1", ValkeyAddr: "localhost:1"} + cfg := &config{NatsURL: "nats://localhost:1", MongoURI: "mongodb://localhost:1", ValkeyAddrs: []string{"localhost:1"}} code := dispatch(context.Background(), cfg) assert.Equal(t, 2, code) } @@ -252,7 +252,7 @@ func TestDispatch_MembersCapacity_RequiresTargetSize(t *testing.T) { oldArgs := os.Args defer func() { os.Args = oldArgs }() os.Args = []string{"loadgen", "members-capacity", "--preset=members-capacity"} - cfg := &config{NatsURL: "nats://localhost:1", MongoURI: "mongodb://localhost:1", ValkeyAddr: "localhost:1"} + cfg := &config{NatsURL: "nats://localhost:1", MongoURI: "mongodb://localhost:1", ValkeyAddrs: []string{"localhost:1"}} code := dispatch(context.Background(), cfg) assert.Equal(t, 2, code) } diff --git a/tools/loadgen/members_integration_test.go b/tools/loadgen/members_integration_test.go index 848712ba0..7ceaad532 100644 --- a/tools/loadgen/members_integration_test.go +++ b/tools/loadgen/members_integration_test.go @@ -17,6 +17,7 @@ import ( "github.com/hmchangw/chat/pkg/model" "github.com/hmchangw/chat/pkg/stream" "github.com/hmchangw/chat/pkg/subject" + "github.com/hmchangw/chat/pkg/testutil" ) // TestMembersSustained_EndToEnd verifies the full members-sustained pipeline @@ -25,9 +26,7 @@ import ( // non-zero traffic. func TestMembersSustained_EndToEnd(t *testing.T) { ctx := context.Background() - natsURL, stopNATS := setupNATS(t) - defer stopNATS() - + natsURL := testutil.NATS(t) nc, err := nats.Connect(natsURL) require.NoError(t, err) defer nc.Drain() //nolint:errcheck