diff --git a/docs/superpowers/plans/2026-05-07-sync-dm-endpoint-part1.md b/docs/superpowers/plans/2026-05-07-sync-dm-endpoint-part1.md new file mode 100644 index 000000000..422da8cd1 --- /dev/null +++ b/docs/superpowers/plans/2026-05-07-sync-dm-endpoint-part1.md @@ -0,0 +1,1603 @@ +# Sync Server-to-Server DM Endpoint — Part 1 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a synchronous server-to-server NATS request/reply endpoint hosted by `room-worker` that creates a DM or botDM (Room + 2 Subscriptions), emits the same events the async path emits, and replies with the requester's `Subscription`. + +**Architecture:** New NATS subject `chat.server.request.room.{siteID}.create.dm` (queue group `room-worker`, server credentials). Handler is persistence-only — caller (user-service) is responsible for all data-integrity validation per the spec's caller-side validation contract. room-worker resolves User records via `GetUser` for data (User.ID, User.SiteID), persists Room + 2 subs, emits `subscription.update` events and cross-site `room_created` outbox event, and replies inline with the requester's Subscription. Idempotent against retries and concurrent races via duplicate-key fallback at insert time. + +**Tech Stack:** Go 1.25, NATS (request/reply via core NATS — not JetStream), MongoDB driver v2, `go.uber.org/mock` (mockgen), `stretchr/testify`, `testcontainers-go`. + +**Source spec:** `docs/superpowers/specs/2026-05-05-sync-dm-and-historyconfig-design.md` (Part 1). + +**Out of scope (in this plan):** Part 2 (HistoryConfig.SharedSince removal) — separate plan. + +--- + +## File Map + +| Path | Change | Responsibility | +|------|--------|----------------| +| `pkg/model/member.go` | modify | New `SyncCreateDMRequest` and `SyncCreateDMReply` types | +| `pkg/model/model_test.go` | modify | JSON round-trip for new types | +| `pkg/subject/subject.go` | modify | New `RoomCreateDMSync(siteID)` builder | +| `pkg/subject/subject_test.go` | modify | Round-trip test for new subject | +| `room-worker/store.go` | modify | Add `FindDMSubscription` to `SubscriptionStore` interface | +| `room-worker/store_mongo.go` | modify | Mongo impl of `FindDMSubscription` | +| `room-worker/mock_store_test.go` | regenerate | Auto-generated; via `make generate SERVICE=room-worker` | +| `room-worker/handler.go` | modify | Extract `buildDMSubs` / `buildBotDMSubs` from `processCreateRoomDM` / `processCreateRoomBotDM` (pure code motion, no behavior change) | +| `room-worker/handler_test.go` | modify | Existing tests continue to pass; no test changes needed (verification only) | +| `room-worker/handler_sync_create_dm.go` | new | `natsServerCreateDM` handler + `handleSyncCreateDM` business logic + sentinels + `sanitizeSyncDMError` | +| `room-worker/handler_sync_create_dm_test.go` | new | Unit tests with mocked store | +| `room-worker/main.go` | modify | `NewHandler` plumbing for the new path; add `nc.QueueSubscribe` for the new subject; ensure `nc.Drain()` is in the existing shutdown chain (no new step needed since core NATS subscriptions are drained by the existing `nc.Drain()` callback) | +| `room-worker/integration_test.go` | modify | Add `TestSyncCreateDM_*` integration tests | + +--- + +## Task 1: Add `SyncCreateDMRequest` and `SyncCreateDMReply` types + +**Files:** +- Modify: `pkg/model/member.go` +- Test: `pkg/model/model_test.go` + +- [ ] **Step 1: Write the failing test** + +Append to `pkg/model/model_test.go` (anywhere in the file is fine — keep near other small JSON-roundtrip tests): + +```go +func TestSyncCreateDMRequestJSON(t *testing.T) { + src := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + b, err := json.Marshal(src) + require.NoError(t, err) + + // Expect exact lower-camelCase keys. + assert.JSONEq(t, `{"roomType":"dm","requesterAccount":"alice","otherAccount":"bob"}`, string(b)) + + var dst model.SyncCreateDMRequest + require.NoError(t, json.Unmarshal(b, &dst)) + assert.Equal(t, src, dst) +} + +func TestSyncCreateDMReplyJSON(t *testing.T) { + now := time.Date(2026, 5, 7, 12, 0, 0, 0, time.UTC) + src := model.SyncCreateDMReply{ + Success: true, + Subscription: model.Subscription{ + ID: "sub1", + User: model.SubscriptionUser{ID: "u1", Account: "alice"}, + RoomID: "room1", + SiteID: "site-a", + Name: "bob", + RoomType: model.RoomTypeDM, + IsSubscribed: true, + JoinedAt: now, + }, + } + b, err := json.Marshal(src) + require.NoError(t, err) + + var dst model.SyncCreateDMReply + require.NoError(t, json.Unmarshal(b, &dst)) + assert.True(t, dst.Success) + assert.Equal(t, src.Subscription.ID, dst.Subscription.ID) + assert.Equal(t, src.Subscription.User, dst.Subscription.User) + assert.Equal(t, src.Subscription.RoomID, dst.Subscription.RoomID) +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd /home/user/chat && go test -run 'TestSyncCreateDM(Request|Reply)JSON' ./pkg/model/...` + +Expected: FAIL with `undefined: model.SyncCreateDMRequest` / `undefined: model.SyncCreateDMReply`. + +- [ ] **Step 3: Add the types to `pkg/model/member.go`** + +Append to `pkg/model/member.go` (after the existing types, before any closing trailing whitespace): + +```go +// SyncCreateDMRequest is the request payload for chat.server.request.room.{siteID}.create.dm. +// Caller (user-service) MUST perform all data-integrity validation before issuing this request: +// existence checks, EngName/ChineseName checks, bot Assistant.Enabled, self-DM rejection, and +// dedup-existing via FindDMSubscription. room-worker performs only request-shape sanity checks +// and resolves User records via GetUser for data (User.ID, User.SiteID). +type SyncCreateDMRequest struct { + RoomType RoomType `json:"roomType"` // RoomTypeDM or RoomTypeBotDM + RequesterAccount string `json:"requesterAccount"` // requester's account + OtherAccount string `json:"otherAccount"` // counterpart account (user or bot) +} + +// SyncCreateDMReply is the success reply payload. Errors are returned via the standard +// natsutil.ReplyError -> model.ErrorResponse path (NOT this envelope). On the wire, +// callers should TryParseError first, then unmarshal SyncCreateDMReply on success. +// +// Success is always true on this path; it is retained as an explicit-positive signal +// that's cheap to compare on the caller side. Subscription is the requester-scoped +// subscription (User.Account == request.RequesterAccount). +type SyncCreateDMReply struct { + Success bool `json:"success"` + Subscription Subscription `json:"subscription"` +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd /home/user/chat && go test -run 'TestSyncCreateDM(Request|Reply)JSON' ./pkg/model/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add pkg/model/member.go pkg/model/model_test.go +git commit -m "feat(pkg/model): add SyncCreateDMRequest and SyncCreateDMReply types" +``` + +--- + +## Task 2: Add `RoomCreateDMSync` subject builder + +**Files:** +- Modify: `pkg/subject/subject.go` +- Test: `pkg/subject/subject_test.go` + +- [ ] **Step 1: Write the failing test** + +Append to `pkg/subject/subject_test.go`: + +```go +func TestRoomCreateDMSync(t *testing.T) { + got := subject.RoomCreateDMSync("site-a") + assert.Equal(t, "chat.server.request.room.site-a.create.dm", got) +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd /home/user/chat && go test -run TestRoomCreateDMSync ./pkg/subject/...` + +Expected: FAIL with `undefined: subject.RoomCreateDMSync`. + +- [ ] **Step 3: Add the builder to `pkg/subject/subject.go`** + +Append to `pkg/subject/subject.go` (next to `RoomsInfoBatch` for grouping): + +```go +// RoomCreateDMSync is the server-to-server request subject for synchronous DM/botDM creation. +// Subscribed by room-worker with queue group "room-worker"; requires server credentials per the +// chat.server.> NATS callout policy. +func RoomCreateDMSync(siteID string) string { + return fmt.Sprintf("chat.server.request.room.%s.create.dm", siteID) +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd /home/user/chat && go test ./pkg/subject/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add pkg/subject/subject.go pkg/subject/subject_test.go +git commit -m "feat(pkg/subject): add RoomCreateDMSync builder" +``` + +--- + +## Task 3: Add `FindDMSubscription` to room-worker store interface and Mongo impl + +**Background:** room-worker's store doesn't currently expose dedup-existing lookups. The sync handler needs `FindDMSubscription` for the duplicate-key redelivery branch (after a `BulkCreateSubscriptions` race fails). Implementation mirrors `room-service/store_mongo.go:627`. + +**Files:** +- Modify: `room-worker/store.go` +- Modify: `room-worker/store_mongo.go` +- Regenerate: `room-worker/mock_store_test.go` (auto-generated) + +- [ ] **Step 1: Add the interface method** + +Edit `room-worker/store.go`. Inside the `SubscriptionStore` interface, near the other Subscription methods (`BulkCreateSubscriptions`, etc.), add: + +```go + // FindDMSubscription returns the requester's existing dm/botDM sub with Name == targetName. + // Returns model.ErrSubscriptionNotFound when no match exists. Used only on the duplicate-key + // redelivery branch of the sync DM handler — not on the happy path. + FindDMSubscription(ctx context.Context, account, targetName string) (*model.Subscription, error) +``` + +- [ ] **Step 2: Add the Mongo implementation** + +Append to `room-worker/store_mongo.go`. Confirm imports include `"go.mongodb.org/mongo-driver/v2/bson"` and `"go.mongodb.org/mongo-driver/v2/mongo"` (most are already present; add `"errors"` if not): + +```go +// FindDMSubscription returns the requester's existing dm/botDM sub with Name == targetName. +// Returns model.ErrSubscriptionNotFound on no match. +func (s *MongoStore) FindDMSubscription(ctx context.Context, account, targetName string) (*model.Subscription, error) { + var sub model.Subscription + err := s.subscriptions.FindOne(ctx, bson.M{ + "u.account": account, + "name": targetName, + "roomType": bson.M{"$in": []model.RoomType{model.RoomTypeDM, model.RoomTypeBotDM}}, + }).Decode(&sub) + if errors.Is(err, mongo.ErrNoDocuments) { + return nil, model.ErrSubscriptionNotFound + } + if err != nil { + return nil, fmt.Errorf("find dm subscription: %w", err) + } + return &sub, nil +} +``` + +> **Note:** Verify the MongoStore struct already has a `subscriptions` field. If it's named differently in `store_mongo.go` (look near the top of the file for `type MongoStore struct`), match the existing field name. + +- [ ] **Step 3: Regenerate the mock** + +Run: `cd /home/user/chat && make generate SERVICE=room-worker` + +This regenerates `room-worker/mock_store_test.go` to include `FindDMSubscription`. + +- [ ] **Step 4: Verify build still compiles** + +Run: `cd /home/user/chat && go build ./room-worker/...` + +Expected: no errors. + +- [ ] **Step 5: Verify existing tests still pass** + +Run: `cd /home/user/chat && make test SERVICE=room-worker` + +Expected: PASS (no behavior change yet — new method is unused). + +- [ ] **Step 6: Commit** + +```bash +cd /home/user/chat +git add room-worker/store.go room-worker/store_mongo.go room-worker/mock_store_test.go +git commit -m "feat(room-worker): add FindDMSubscription to SubscriptionStore" +``` + +--- + +## Task 4: Refactor — extract `buildDMSubs` and `buildBotDMSubs` helpers + +**Background:** The async path's `processCreateRoomDM` (`room-worker/handler.go:934`) and `processCreateRoomBotDM` (`room-worker/handler.go:953`) build the subscription pair inline. Extract those two `[]*model.Subscription{...}` literals into named helpers so the new sync handler can call them. **Pure code motion — no behavior change.** Existing tests must continue to pass without modification. + +**Files:** +- Modify: `room-worker/handler.go` +- Verify: `room-worker/handler_test.go` (no changes — existing `TestProcessCreateRoomDM*` / `TestProcessCreateRoomBotDM*` should still pass) + +- [ ] **Step 1: Extract `buildDMSubs`** + +Edit `room-worker/handler.go`. Add this helper near `newSub` (around line 824), above `processCreateRoom`: + +```go +// buildDMSubs returns the two subscriptions for a DM: requester's sub names the other, +// other's sub names the requester. Both have IsSubscribed=false (matches existing behavior). +func buildDMSubs(requester, other *model.User, room *model.Room, acceptedAt time.Time) []*model.Subscription { + return []*model.Subscription{ + newSub(idgen.GenerateUUIDv7(), requester, room, nil, other.Account, false, acceptedAt), + newSub(idgen.GenerateUUIDv7(), other, room, nil, requester.Account, false, acceptedAt), + } +} + +// buildBotDMSubs returns the two subscriptions for a botDM. Human's sub: Name=bot.Account, +// IsSubscribed=true. Bot's sub: Name=requester.Account, IsSubscribed=false. +func buildBotDMSubs(requester, bot *model.User, room *model.Room, acceptedAt time.Time) []*model.Subscription { + return []*model.Subscription{ + newSub(idgen.GenerateUUIDv7(), requester, room, nil, bot.Account, true, acceptedAt), + newSub(idgen.GenerateUUIDv7(), bot, room, nil, requester.Account, false, acceptedAt), + } +} +``` + +- [ ] **Step 2: Update `processCreateRoomDM` to call `buildDMSubs`** + +In `room-worker/handler.go:934`, replace the inline literal: + +```go + subs := []*model.Subscription{ + newSub(idgen.GenerateUUIDv7(), requester, room, nil, other.Account, false, acceptedAt), + newSub(idgen.GenerateUUIDv7(), other, room, nil, requester.Account, false, acceptedAt), + } +``` + +with: + +```go + subs := buildDMSubs(requester, other, room, acceptedAt) +``` + +- [ ] **Step 3: Update `processCreateRoomBotDM` to call `buildBotDMSubs`** + +In `room-worker/handler.go:953`, replace the inline literal: + +```go + subs := []*model.Subscription{ + // Human's sub: Name = bot account; IsSubscribed = true + newSub(idgen.GenerateUUIDv7(), requester, room, nil, bot.Account, true, acceptedAt), + // Bot's sub: Name = requester's account; IsSubscribed = false + newSub(idgen.GenerateUUIDv7(), bot, room, nil, requester.Account, false, acceptedAt), + } +``` + +with: + +```go + subs := buildBotDMSubs(requester, bot, room, acceptedAt) +``` + +- [ ] **Step 4: Verify all existing room-worker tests still pass** + +Run: `cd /home/user/chat && make test SERVICE=room-worker` + +Expected: PASS — every existing test (notably `TestProcessCreateRoomDM*`, `TestProcessCreateRoomBotDM*`, `TestFinishCreateRoom*`) continues to pass without modification. + +If any test fails, the extraction was not byte-equivalent. Re-read steps 1–3 and check that argument order matches `newSub`'s signature exactly. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler.go +git commit -m "refactor(room-worker): extract buildDMSubs/buildBotDMSubs helpers" +``` + +--- + +## Task 5: Add sentinels and `sanitizeSyncDMError` helper + +**Background:** room-worker's existing handlers don't reply to clients (they emit AsyncJobResult). The sync handler is the first to call `natsutil.ReplyError`, so it needs its own sentinel set + sanitization helper. + +**Files:** +- Create: `room-worker/handler_sync_create_dm.go` + +- [ ] **Step 1: Write a unit test stub** (real tests come in later tasks; this just exercises the sanitizer) + +Create `room-worker/handler_sync_create_dm_test.go` with the package declaration and one test: + +```go +package main + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSanitizeSyncDMError(t *testing.T) { + cases := []struct { + name string + in error + want string + }{ + {"nil returns empty", nil, ""}, + {"missing request ID surfaced", errMissingRequestID, "missing X-Request-ID header"}, + {"invalid request ID surfaced", errInvalidRequestID, "invalid X-Request-ID header"}, + {"invalid sync DM request surfaced", errInvalidSyncDMRequest, "invalid sync DM request"}, + {"user lookup failed surfaced", errUserLookupFailed, "user lookup failed"}, + {"cross-site requester surfaced", errCrossSiteRequester, "requester is not on this site"}, + {"room ID collision surfaced", errRoomIDCollision, "room ID collision (existing room metadata mismatch)"}, + {"unknown error masked as internal", errors.New("mongo: connection refused"), "internal error"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, sanitizeSyncDMError(tc.in)) + }) + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd /home/user/chat && go test -run TestSanitizeSyncDMError ./room-worker/...` + +Expected: FAIL with `undefined: errMissingRequestID` (and the others). + +- [ ] **Step 3: Create `room-worker/handler_sync_create_dm.go` with sentinels + sanitizer** + +Create the new file with this initial content (more handler code lands in later tasks): + +```go +package main + +import ( + "errors" +) + +var ( + errMissingRequestID = errors.New("missing X-Request-ID header") + errInvalidRequestID = errors.New("invalid X-Request-ID header") + errInvalidSyncDMRequest = errors.New("invalid sync DM request") + errUserLookupFailed = errors.New("user lookup failed") + errCrossSiteRequester = errors.New("requester is not on this site") + errRoomIDCollision = errors.New("room ID collision (existing room metadata mismatch)") +) + +// sanitizeSyncDMError maps a handler error to a user-displayable string. +// Known sentinels surface their literal message; anything else becomes "internal error" +// to avoid leaking raw error text (e.g. mongo or NATS internals). +func sanitizeSyncDMError(err error) string { + if err == nil { + return "" + } + switch { + case errors.Is(err, errMissingRequestID), + errors.Is(err, errInvalidRequestID), + errors.Is(err, errInvalidSyncDMRequest), + errors.Is(err, errUserLookupFailed), + errors.Is(err, errCrossSiteRequester), + errors.Is(err, errRoomIDCollision): + return err.Error() + default: + return "internal error" + } +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd /home/user/chat && go test -run TestSanitizeSyncDMError ./room-worker/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/handler_sync_create_dm_test.go +git commit -m "feat(room-worker): sync DM handler sentinels and sanitizer" +``` + +--- + +## Task 6: Implement `handleSyncCreateDM` — request shape validation + +**Files:** +- Modify: `room-worker/handler_sync_create_dm.go` +- Modify: `room-worker/handler_sync_create_dm_test.go` + +- [ ] **Step 1: Write failing tests for shape validation** + +Append to `room-worker/handler_sync_create_dm_test.go`: + +```go +import ( + "context" + "encoding/json" + + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/natsutil" +) + +// newRequestCtx returns a context carrying a syntactically-valid X-Request-ID. +func newRequestCtx() context.Context { + return natsutil.WithRequestID(context.Background(), "01970a4f-8c2d-7c9a-abcd-e0123456789f") +} + +func TestHandleSyncCreateDM_MissingRequestID(t *testing.T) { + h := &Handler{siteID: "site-a"} + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(context.Background(), data) + assert.ErrorIs(t, err, errMissingRequestID) +} + +func TestHandleSyncCreateDM_InvalidJSON(t *testing.T) { + h := &Handler{siteID: "site-a"} + _, err := h.handleSyncCreateDM(newRequestCtx(), []byte("{not json")) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) +} + +func TestHandleSyncCreateDM_InvalidRoomType(t *testing.T) { + h := &Handler{siteID: "site-a"} + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeChannel, // invalid for sync DM + RequesterAccount: "alice", + OtherAccount: "bob", + } + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) +} + +func TestHandleSyncCreateDM_EmptyAccounts(t *testing.T) { + h := &Handler{siteID: "site-a"} + cases := []model.SyncCreateDMRequest{ + {RoomType: model.RoomTypeDM, RequesterAccount: "", OtherAccount: "bob"}, + {RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: ""}, + } + for _, req := range cases { + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) + } +} + +func TestHandleSyncCreateDM_SelfDM(t *testing.T) { + h := &Handler{siteID: "site-a"} + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "alice", + } + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) +} +``` + +> **Note on `natsutil.WithRequestID`:** This is already exported by `pkg/natsutil/request_id.go:17`. Use it directly — no new helper needed. + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_' ./room-worker/...` + +Expected: FAIL — `handleSyncCreateDM` is undefined. + +- [ ] **Step 3: Add the `handleSyncCreateDM` method with shape validation only** + +Append to `room-worker/handler_sync_create_dm.go`: + +```go +import ( + "context" + "encoding/json" + + "github.com/hmchangw/chat/pkg/idgen" + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/natsutil" +) + +// handleSyncCreateDM is the business logic for the sync DM endpoint. It takes the inbound +// request bytes and returns either the marshalled SyncCreateDMReply payload or an error. +// The natsServerCreateDM wrapper (Task 12) handles unwrapping the NATS msg and replying. +func (h *Handler) handleSyncCreateDM(ctx context.Context, data []byte) ([]byte, error) { + requestID := natsutil.RequestIDFromContext(ctx) + if requestID == "" { + return nil, errMissingRequestID + } + if !idgen.IsValidUUID(requestID) { + return nil, errInvalidRequestID + } + + var req model.SyncCreateDMRequest + if err := json.Unmarshal(data, &req); err != nil { + return nil, errInvalidSyncDMRequest + } + if err := validateSyncCreateDMShape(&req); err != nil { + return nil, err + } + + // Subsequent steps (User resolution, persistence, events, reply) land in later tasks. + // For now this is a stub that reaches no further than shape validation. + return nil, errInvalidSyncDMRequest // placeholder — replaced in Task 7 +} + +func validateSyncCreateDMShape(req *model.SyncCreateDMRequest) error { + switch req.RoomType { + case model.RoomTypeDM, model.RoomTypeBotDM: + // ok + default: + return errInvalidSyncDMRequest + } + if req.RequesterAccount == "" || req.OtherAccount == "" { + return errInvalidSyncDMRequest + } + if req.RequesterAccount == req.OtherAccount { + return errInvalidSyncDMRequest + } + return nil +} +``` + +> **Note on `idgen.IsValidUUID`:** confirm this function exists with `grep -n "func IsValidUUID\b" /home/user/chat/pkg/idgen/idgen.go`. The existing room-service code uses it (`room-service/handler.go:157`). If not available, fall back to `idgen.IsValidUUIDv7` per the project CLAUDE.md (which states 36-char hyphenated UUIDv7 is the request-ID format) and adjust accordingly. + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_' ./room-worker/...` + +Expected: PASS for the shape-validation tests. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/handler_sync_create_dm_test.go pkg/natsutil/request_id.go +git commit -m "feat(room-worker): handleSyncCreateDM request-shape validation" +``` + +(Drop `pkg/natsutil/request_id.go` from the `git add` if you didn't need to touch it.) + +--- + +## Task 7: Resolve User records and validate cross-site requester + +**Files:** +- Modify: `room-worker/handler_sync_create_dm.go` +- Modify: `room-worker/handler_sync_create_dm_test.go` + +- [ ] **Step 1: Write failing tests** + +Append to `room-worker/handler_sync_create_dm_test.go`: + +```go +import ( + "go.uber.org/mock/gomock" +) + +func TestHandleSyncCreateDM_RequesterNotFound(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + h := &Handler{siteID: "site-a", store: store} + + store.EXPECT().GetUser(gomock.Any(), "alice").Return(nil, ErrUserNotFound) + + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errUserLookupFailed) +} + +func TestHandleSyncCreateDM_OtherNotFound(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + h := &Handler{siteID: "site-a", store: store} + + store.EXPECT().GetUser(gomock.Any(), "alice").Return(&model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"}, nil) + store.EXPECT().GetUser(gomock.Any(), "bob").Return(nil, ErrUserNotFound) + + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errUserLookupFailed) +} + +func TestHandleSyncCreateDM_CrossSiteRequester(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + h := &Handler{siteID: "site-a", store: store} + + // Requester's home is site-b, but this room-worker is site-a → routing bug. + store.EXPECT().GetUser(gomock.Any(), "alice").Return(&model.User{ID: "u-alice", Account: "alice", SiteID: "site-b"}, nil) + + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errCrossSiteRequester) +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_(RequesterNotFound|OtherNotFound|CrossSiteRequester)' ./room-worker/...` + +Expected: FAIL. + +- [ ] **Step 3: Add User resolution to `handleSyncCreateDM`** + +Edit `room-worker/handler_sync_create_dm.go`. Replace the placeholder return-line in `handleSyncCreateDM` with the User resolution block, and add an `errors` import: + +```go +import ( + "context" + "encoding/json" + "errors" + "fmt" + + "github.com/hmchangw/chat/pkg/idgen" + "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/natsutil" +) +``` + +Replace the `return nil, errInvalidSyncDMRequest // placeholder` line with: + +```go + requester, err := h.store.GetUser(ctx, req.RequesterAccount) + if err != nil { + if errors.Is(err, ErrUserNotFound) { + return nil, errUserLookupFailed + } + return nil, fmt.Errorf("get requester: %w", errUserLookupFailed) + } + if requester.SiteID != h.siteID { + return nil, errCrossSiteRequester + } + + other, err := h.store.GetUser(ctx, req.OtherAccount) + if err != nil { + if errors.Is(err, ErrUserNotFound) { + return nil, errUserLookupFailed + } + return nil, fmt.Errorf("get counterpart: %w", errUserLookupFailed) + } + + // Suppress unused warnings until subsequent tasks consume these. + _, _ = requester, other + return nil, errInvalidSyncDMRequest // placeholder — replaced in Task 8 +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_' ./room-worker/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/handler_sync_create_dm_test.go +git commit -m "feat(room-worker): resolve User records in sync DM handler" +``` + +--- + +## Task 8: Persist Room with duplicate-key fallback + +**Files:** +- Modify: `room-worker/handler_sync_create_dm.go` +- Modify: `room-worker/handler_sync_create_dm_test.go` + +- [ ] **Step 1: Write failing tests** + +Append to `room-worker/handler_sync_create_dm_test.go`: + +```go +import ( + "time" + + "go.mongodb.org/mongo-driver/v2/mongo" +) + +func TestHandleSyncCreateDM_RoomCollisionMismatch(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + h := &Handler{siteID: "site-a", store: store} + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().GetUser(gomock.Any(), "alice").Return(requester, nil) + store.EXPECT().GetUser(gomock.Any(), "bob").Return(other, nil) + + dupErr := mongo.WriteException{WriteErrors: []mongo.WriteError{{Code: 11000}}} + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(dupErr) + // Existing room with mismatched Type → collision. + store.EXPECT().GetRoom(gomock.Any(), gomock.Any()).Return(&model.Room{ + ID: idgen.BuildDMRoomID("u-alice", "u-bob"), Type: model.RoomTypeChannel, + SiteID: "site-a", Name: "", CreatedBy: "u-alice", + }, nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errRoomIDCollision) +} +``` + +> **Note:** Confirm `mongo.IsDuplicateKeyError` is used as the duplicate-key check (it's used in `processCreateRoom`). The `WriteException{Code:11000}` form satisfies it. + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd /home/user/chat && go test -run TestHandleSyncCreateDM_RoomCollisionMismatch ./room-worker/...` + +Expected: FAIL — `CreateRoom` is not called yet. + +- [ ] **Step 3: Add Room persistence** + +Replace the placeholder lines (`_, _ = requester, other` and the `return nil, errInvalidSyncDMRequest // placeholder` line) in `handleSyncCreateDM` with: + +```go + acceptedAt := time.Now().UTC() + roomID := idgen.BuildDMRoomID(requester.ID, other.ID) + + room := &model.Room{ + ID: roomID, + Name: "", + Type: req.RoomType, + CreatedBy: requester.ID, + SiteID: h.siteID, + CreatedAt: acceptedAt, + UpdatedAt: acceptedAt, + } + if err := h.store.CreateRoom(ctx, room); err != nil { + if !mongo.IsDuplicateKeyError(err) { + return nil, fmt.Errorf("create room: %w", err) + } + existing, fetchErr := h.store.GetRoom(ctx, room.ID) + if fetchErr != nil { + return nil, fmt.Errorf("fetch room on duplicate-key: %w", fetchErr) + } + if existing.Type != room.Type || + existing.SiteID != room.SiteID || + existing.Name != room.Name || + existing.CreatedBy != room.CreatedBy { + return nil, errRoomIDCollision + } + room = existing + acceptedAt = existing.CreatedAt + } + + _, _ = room, acceptedAt + return nil, errInvalidSyncDMRequest // placeholder — replaced in Task 9 +``` + +Add the new imports `"time"` and `"go.mongodb.org/mongo-driver/v2/mongo"` to the file's import block. + +- [ ] **Step 4: Run tests** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_' ./room-worker/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/handler_sync_create_dm_test.go +git commit -m "feat(room-worker): persist Room with duplicate-key fallback" +``` + +--- + +## Task 9: Persist subscriptions and handle bulk-insert duplicate-key + +**Files:** +- Modify: `room-worker/handler_sync_create_dm.go` +- Modify: `room-worker/handler_sync_create_dm_test.go` + +- [ ] **Step 1: Write failing tests for the happy path and the bulk-duplicate-key fallback** + +Append to `room-worker/handler_sync_create_dm_test.go`: + +```go +func TestHandleSyncCreateDM_DM_PersistsSubsAndReturnsRequester(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + h := &Handler{siteID: "site-a", store: store, publish: noopPublish} + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().GetUser(gomock.Any(), "alice").Return(requester, nil) + store.EXPECT().GetUser(gomock.Any(), "bob").Return(other, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + + var captured []*model.Subscription + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, subs []*model.Subscription) error { + captured = subs + return nil + }) + store.EXPECT().ReconcileMemberCounts(gomock.Any(), gomock.Any()).Return(nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + rawReply, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + + var reply model.SyncCreateDMReply + require.NoError(t, json.Unmarshal(rawReply, &reply)) + assert.True(t, reply.Success) + assert.Equal(t, "alice", reply.Subscription.User.Account) + assert.Equal(t, "u-alice", reply.Subscription.User.ID) + assert.Equal(t, idgen.BuildDMRoomID("u-alice", "u-bob"), reply.Subscription.RoomID) + assert.Equal(t, "bob", reply.Subscription.Name) + assert.Equal(t, model.RoomTypeDM, reply.Subscription.RoomType) + + require.Len(t, captured, 2) +} + +func TestHandleSyncCreateDM_BotDM_RequesterSubIsSubscribedTrue(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + h := &Handler{siteID: "site-a", store: store, publish: noopPublish} + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + bot := &model.User{ID: "u-bot", Account: "helper.bot", SiteID: "site-a"} + store.EXPECT().GetUser(gomock.Any(), "alice").Return(requester, nil) + store.EXPECT().GetUser(gomock.Any(), "helper.bot").Return(bot, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().ReconcileMemberCounts(gomock.Any(), gomock.Any()).Return(nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeBotDM, RequesterAccount: "alice", OtherAccount: "helper.bot"} + data, _ := json.Marshal(req) + rawReply, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + + var reply model.SyncCreateDMReply + require.NoError(t, json.Unmarshal(rawReply, &reply)) + assert.True(t, reply.Subscription.IsSubscribed) + assert.Equal(t, "alice", reply.Subscription.User.Account) +} + +func TestHandleSyncCreateDM_BulkInsertDuplicateKey_FallsBackToFind(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + h := &Handler{siteID: "site-a", store: store, publish: noopPublish} + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().GetUser(gomock.Any(), "alice").Return(requester, nil) + store.EXPECT().GetUser(gomock.Any(), "bob").Return(other, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + + dupErr := mongo.WriteException{WriteErrors: []mongo.WriteError{{Code: 11000}}} + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(dupErr) + existingSub := &model.Subscription{ + ID: "existing-sub", + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, + RoomID: idgen.BuildDMRoomID("u-alice", "u-bob"), + Name: "bob", + RoomType: model.RoomTypeDM, + IsSubscribed: true, + } + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(existingSub, nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + rawReply, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + + var reply model.SyncCreateDMReply + require.NoError(t, json.Unmarshal(rawReply, &reply)) + assert.Equal(t, "existing-sub", reply.Subscription.ID) +} + +// noopPublish is a PublishFunc that drops all events; used by tests that don't +// assert on publish behavior. +func noopPublish(_ context.Context, _ string, _ []byte, _ string) error { return nil } +``` + +> **Note:** the `publish` field on `Handler` is the third arg to `NewHandler` (see `room-worker/main.go:77`). The exported tests can either use the existing constructor or, if simpler, set the unexported `publish` field directly via a test-only constructor in `room-worker/handler.go`. Confirm the `Handler` struct field names with `grep -n "type Handler struct" /home/user/chat/room-worker/handler.go`. If `publish` isn't exported in the struct definition or accessible from `_test.go` in the same package, no change is needed — same-package `_test.go` files can read/write unexported fields directly. + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_(DM_PersistsSubs|BotDM_|BulkInsertDuplicateKey)' ./room-worker/...` + +Expected: FAIL — subscription persistence and reply marshalling not yet wired. + +- [ ] **Step 3: Add subscription persistence + reply** + +Replace the existing placeholder block (`_, _ = room, acceptedAt` and the `return nil, errInvalidSyncDMRequest`) with: + +```go + var subs []*model.Subscription + switch req.RoomType { + case model.RoomTypeDM: + subs = buildDMSubs(requester, other, room, acceptedAt) + case model.RoomTypeBotDM: + subs = buildBotDMSubs(requester, other, room, acceptedAt) + } + + requesterSub, err := h.persistSyncDMSubs(ctx, requester, other, subs) + if err != nil { + return nil, err + } + + if rcErr := h.store.ReconcileMemberCounts(ctx, room.ID); rcErr != nil { + slog.Error("sync DM: reconcile member counts failed", + "error", rcErr, "roomID", room.ID, "requestID", requestID) + } + + // subscription.update fan-out + outbox emission land in Tasks 10 and 11. + + reply, err := json.Marshal(model.SyncCreateDMReply{ + Success: true, + Subscription: *requesterSub, + }) + if err != nil { + return nil, fmt.Errorf("marshal reply: %w", err) + } + return reply, nil +} + +// persistSyncDMSubs inserts both subs. On a duplicate-key race (concurrent caller or retry), +// it falls back to fetching the requester's existing sub via FindDMSubscription and returns +// that — preserving idempotent behavior. +func (h *Handler) persistSyncDMSubs(ctx context.Context, requester, other *model.User, + subs []*model.Subscription, +) (*model.Subscription, error) { + err := h.store.BulkCreateSubscriptions(ctx, subs) + if err == nil { + return pickRequesterSub(subs, requester.Account), nil + } + if !mongo.IsDuplicateKeyError(err) { + return nil, fmt.Errorf("bulk create subs: %w", err) + } + existing, fetchErr := h.store.FindDMSubscription(ctx, requester.Account, other.Account) + if fetchErr != nil { + return nil, fmt.Errorf("find existing sub on duplicate-key: %w", fetchErr) + } + return existing, nil +} + +func pickRequesterSub(subs []*model.Subscription, requesterAccount string) *model.Subscription { + for _, s := range subs { + if s.User.Account == requesterAccount { + return s + } + } + return nil +} +``` + +Add `"log/slog"` to the import block. + +- [ ] **Step 4: Run tests** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_' ./room-worker/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/handler_sync_create_dm_test.go +git commit -m "feat(room-worker): persist subs with duplicate-key fallback and reply" +``` + +--- + +## Task 10: Publish `subscription.update` events + +**Files:** +- Modify: `room-worker/handler_sync_create_dm.go` +- Modify: `room-worker/handler_sync_create_dm_test.go` + +- [ ] **Step 1: Write failing tests** + +Append to `room-worker/handler_sync_create_dm_test.go`: + +```go +import ( + "sync" + + "github.com/hmchangw/chat/pkg/subject" +) + +type capturedPublish struct { + subject string + data []byte + msgID string +} + +type publishCapturer struct { + mu sync.Mutex + captured []capturedPublish +} + +func (c *publishCapturer) fn(_ context.Context, subj string, data []byte, msgID string) error { + c.mu.Lock() + defer c.mu.Unlock() + c.captured = append(c.captured, capturedPublish{subject: subj, data: append([]byte(nil), data...), msgID: msgID}) + return nil +} + +func TestHandleSyncCreateDM_PublishesSubscriptionUpdateForBothUsers(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + cap := &publishCapturer{} + h := &Handler{siteID: "site-a", store: store, publish: cap.fn} + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().GetUser(gomock.Any(), "alice").Return(requester, nil) + store.EXPECT().GetUser(gomock.Any(), "bob").Return(other, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().ReconcileMemberCounts(gomock.Any(), gomock.Any()).Return(nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + + // Two subscription.update publishes — one per user. + subjects := map[string]int{} + for _, p := range cap.captured { + subjects[p.subject]++ + } + assert.Equal(t, 1, subjects[subject.SubscriptionUpdate("alice")]) + assert.Equal(t, 1, subjects[subject.SubscriptionUpdate("bob")]) +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd /home/user/chat && go test -run TestHandleSyncCreateDM_PublishesSubscriptionUpdateForBothUsers ./room-worker/...` + +Expected: FAIL — events not published. + +- [ ] **Step 3: Add subscription.update fan-out** + +Edit `room-worker/handler_sync_create_dm.go`. Replace the comment line `// subscription.update fan-out + outbox emission land in Tasks 10 and 11.` with the fan-out call: + +```go + h.publishSubscriptionUpdates(ctx, subs, acceptedAt, requestID) + // outbox emission lands in Task 11. +``` + +Then add the new method (anywhere in the same file, below `handleSyncCreateDM`): + +```go +func (h *Handler) publishSubscriptionUpdates(ctx context.Context, subs []*model.Subscription, acceptedAt time.Time, requestID string) { + for _, sub := range subs { + evt := model.SubscriptionUpdateEvent{ + UserID: sub.User.ID, + Subscription: *sub, + Action: "added", + Timestamp: acceptedAt.UnixMilli(), + } + data, err := json.Marshal(evt) + if err != nil { + slog.Error("sync DM: marshal subscription.update failed", + "error", err, "account", sub.User.Account, "requestID", requestID) + continue + } + if err := h.publish(ctx, subject.SubscriptionUpdate(sub.User.Account), data, ""); err != nil { + slog.Error("sync DM: publish subscription.update failed", + "error", err, "account", sub.User.Account, "requestID", requestID) + } + } +} +``` + +Add `"github.com/hmchangw/chat/pkg/subject"` to the import block. + +- [ ] **Step 4: Run tests** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_' ./room-worker/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/handler_sync_create_dm_test.go +git commit -m "feat(room-worker): publish subscription.update events from sync DM handler" +``` + +--- + +## Task 11: Publish cross-site `room_created` outbox event + +**Files:** +- Modify: `room-worker/handler_sync_create_dm.go` +- Modify: `room-worker/handler_sync_create_dm_test.go` + +- [ ] **Step 1: Write failing tests** + +Append to `room-worker/handler_sync_create_dm_test.go`: + +```go +func TestHandleSyncCreateDM_CrossSite_EmitsOutbox(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + cap := &publishCapturer{} + h := &Handler{siteID: "site-a", store: store, publish: cap.fn} + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-b"} // remote + store.EXPECT().GetUser(gomock.Any(), "alice").Return(requester, nil) + store.EXPECT().GetUser(gomock.Any(), "bob").Return(other, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().ReconcileMemberCounts(gomock.Any(), gomock.Any()).Return(nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + + var outbox *capturedPublish + for i := range cap.captured { + if cap.captured[i].subject == subject.Outbox("site-a", "site-b", model.OutboxTypeRoomCreated) { + outbox = &cap.captured[i] + break + } + } + require.NotNil(t, outbox, "expected an outbox publish to site-b") + + var env model.OutboxEvent + require.NoError(t, json.Unmarshal(outbox.data, &env)) + assert.Equal(t, model.OutboxTypeRoomCreated, env.Type) + assert.Equal(t, "site-a", env.SiteID) + assert.Equal(t, "site-b", env.DestSiteID) + + var payload model.RoomCreatedOutbox + require.NoError(t, json.Unmarshal(env.Payload, &payload)) + assert.Equal(t, model.RoomTypeDM, payload.RoomType) + assert.Equal(t, "", payload.RoomName) + assert.Equal(t, "site-a", payload.HomeSiteID) + assert.Equal(t, []string{"bob"}, payload.Accounts) + assert.Equal(t, "alice", payload.RequesterAccount) + // Nats-Msg-Id encoded into the publish msgID for JetStream dedup. + assert.Equal(t, "01970a4f-8c2d-7c9a-abcd-e0123456789f:site-b", outbox.msgID) +} + +func TestHandleSyncCreateDM_SameSite_NoOutbox(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + store := NewMockSubscriptionStore(ctrl) + cap := &publishCapturer{} + h := &Handler{siteID: "site-a", store: store, publish: cap.fn} + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} // same site + store.EXPECT().GetUser(gomock.Any(), "alice").Return(requester, nil) + store.EXPECT().GetUser(gomock.Any(), "bob").Return(other, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().ReconcileMemberCounts(gomock.Any(), gomock.Any()).Return(nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + + for _, p := range cap.captured { + assert.NotContains(t, p.subject, "outbox.", "no outbox publish expected for same-site DM") + } +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_(CrossSite|SameSite)' ./room-worker/...` + +Expected: FAIL — outbox publish not yet wired. + +- [ ] **Step 3: Add cross-site outbox publish** + +Edit `room-worker/handler_sync_create_dm.go`. Replace the comment line `// outbox emission lands in Task 11.` with: + +```go + if err := h.publishSyncDMOutbox(ctx, room, requester, other, acceptedAt, requestID); err != nil { + slog.Error("sync DM: publish outbox failed", + "error", err, "roomID", room.ID, "requestID", requestID) + } +``` + +Then append a new method below `publishSubscriptionUpdates`: + +```go +func (h *Handler) publishSyncDMOutbox(ctx context.Context, room *model.Room, requester, other *model.User, acceptedAt time.Time, requestID string) error { + if other.SiteID == "" || other.SiteID == h.siteID { + return nil + } + + payload := model.RoomCreatedOutbox{ + RoomID: room.ID, + RoomType: room.Type, + RoomName: "", + HomeSiteID: room.SiteID, + Accounts: []string{other.Account}, + RequesterAccount: requester.Account, + Timestamp: acceptedAt.UnixMilli(), + } + pData, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal room_created outbox payload: %w", err) + } + envelope := model.OutboxEvent{ + Type: model.OutboxTypeRoomCreated, + SiteID: room.SiteID, + DestSiteID: other.SiteID, + Payload: pData, + Timestamp: acceptedAt.UnixMilli(), + } + eData, err := json.Marshal(envelope) + if err != nil { + return fmt.Errorf("marshal outbox envelope: %w", err) + } + return h.publish(ctx, + subject.Outbox(room.SiteID, other.SiteID, model.OutboxTypeRoomCreated), + eData, + requestID+":"+other.SiteID, + ) +} +``` + +- [ ] **Step 4: Run tests** + +Run: `cd /home/user/chat && go test -run 'TestHandleSyncCreateDM_' ./room-worker/...` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/handler_sync_create_dm_test.go +git commit -m "feat(room-worker): emit cross-site room_created outbox from sync DM handler" +``` + +--- + +## Task 12: Add `natsServerCreateDM` wrapper and wire it up in `main.go` + +**Files:** +- Modify: `room-worker/handler_sync_create_dm.go` +- Modify: `room-worker/main.go` + +- [ ] **Step 1: Add the NATS handler wrapper** + +Append to `room-worker/handler_sync_create_dm.go`. This wraps `handleSyncCreateDM` with NATS msg unmarshalling and reply marshalling: + +```go +import ( + "github.com/Marz32onE/instrumentation-go/otel-nats/otelnats" +) + +// natsServerCreateDM is the NATS-side entry point for chat.server.request.room.{siteID}.create.dm. +// It builds the handler context, invokes handleSyncCreateDM, and replies via natsutil. +func (h *Handler) natsServerCreateDM(m otelnats.Msg) { + ctx := natsutil.ContextWithRequestIDFromHeaders(m.Context(), m.Msg.Header) + reply, err := h.handleSyncCreateDM(ctx, m.Msg.Data) + if err != nil { + natsutil.ReplyError(m.Msg, sanitizeSyncDMError(err)) + return + } + if _, werr := m.Msg.RespondMsg(natsutil.NewMsg(ctx, m.Msg.Reply, reply)); werr != nil { + slog.Error("sync DM: reply failed", "error", werr) + } +} +``` + +> **Note:** Confirm the otelnats package path with `grep -n "otelnats\." /home/user/chat/room-service/handler.go | head -3`. The exact import path used by room-service is the same one to use here. + +- [ ] **Step 2: Wire the subscription in `room-worker/main.go`** + +Edit `room-worker/main.go`. After the existing `handler := NewHandler(...)` block (line 91, before the `cons, err := js.CreateOrUpdateConsumer(...)` block), add: + +```go + if _, err := nc.QueueSubscribe(subject.RoomCreateDMSync(cfg.SiteID), "room-worker", handler.natsServerCreateDM); err != nil { + slog.Error("subscribe sync DM endpoint failed", "error", err) + os.Exit(1) + } +``` + +Add `"github.com/hmchangw/chat/pkg/subject"` to the import block (it isn't there today). + +> **Note on `nc` type:** confirm `nc.QueueSubscribe(...)` is available with the appropriate signature on the `*otelnats.Conn` returned by `natsutil.Connect`. If room-service uses the same call shape (it does — `room-service/handler.go:60`), this pattern is correct. + +- [ ] **Step 3: Verify the build** + +Run: `cd /home/user/chat && go build ./room-worker/...` + +Expected: clean build. + +- [ ] **Step 4: Run all room-worker unit tests** + +Run: `cd /home/user/chat && make test SERVICE=room-worker` + +Expected: PASS. + +- [ ] **Step 5: Verify lint** + +Run: `cd /home/user/chat && make lint` + +Expected: clean. + +- [ ] **Step 6: Commit** + +```bash +cd /home/user/chat +git add room-worker/handler_sync_create_dm.go room-worker/main.go +git commit -m "feat(room-worker): wire sync DM NATS endpoint in main" +``` + +--- + +## Task 13: Add integration tests with testcontainers + +**Files:** +- Modify: `room-worker/integration_test.go` + +- [ ] **Step 1: Survey the existing integration test setup** + +Run: `cd /home/user/chat && grep -n "TestProcessCreateRoomDM\|setupMongo\|setupNATS\|//go:build integration" room-worker/integration_test.go | head -20` + +This shows the existing helpers (`setupMongo`, etc.) you'll reuse. Use them directly — don't reinvent. + +- [ ] **Step 2: Add the integration tests** + +Append to `room-worker/integration_test.go` (inside the `//go:build integration` file): + +```go +func TestSyncCreateDM_DM_PersistsRoomAndSubs(t *testing.T) { + ctx := context.Background() + store := setupMongo(t) // existing helper + siteID := "site-a" + + // Seed both User docs the handler will GetUser. + seedUser(t, store, &model.User{ID: "u-alice", Account: "alice", SiteID: siteID, EngName: "Alice", ChineseName: "愛麗絲"}) + seedUser(t, store, &model.User{ID: "u-bob", Account: "bob", SiteID: siteID, EngName: "Bob", ChineseName: "鮑勃"}) + + captured := newPublishCapturer() + handler := NewHandler(store, siteID, captured.fn) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + reply, err := handler.handleSyncCreateDM(newIntegRequestCtx(), data) + require.NoError(t, err) + + var got model.SyncCreateDMReply + require.NoError(t, json.Unmarshal(reply, &got)) + assert.True(t, got.Success) + assert.Equal(t, "alice", got.Subscription.User.Account) + + // Verify Room + 2 subs in Mongo. + roomID := idgen.BuildDMRoomID("u-alice", "u-bob") + room, err := store.GetRoom(ctx, roomID) + require.NoError(t, err) + assert.Equal(t, model.RoomTypeDM, room.Type) + assert.Equal(t, siteID, room.SiteID) + + // 2 subscription.update publishes captured. + assert.Equal(t, 2, captured.countSubject(subject.SubscriptionUpdate("alice"))+captured.countSubject(subject.SubscriptionUpdate("bob"))) +} + +func TestSyncCreateDM_BotDM_CrossSiteOutbox(t *testing.T) { + store := setupMongo(t) + siteID := "site-a" + + seedUser(t, store, &model.User{ID: "u-alice", Account: "alice", SiteID: siteID, EngName: "Alice", ChineseName: "愛麗絲"}) + seedUser(t, store, &model.User{ID: "u-bot", Account: "helper.bot", SiteID: "site-b"}) + + captured := newPublishCapturer() + handler := NewHandler(store, siteID, captured.fn) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeBotDM, RequesterAccount: "alice", OtherAccount: "helper.bot"} + data, _ := json.Marshal(req) + _, err := handler.handleSyncCreateDM(newIntegRequestCtx(), data) + require.NoError(t, err) + + assert.Equal(t, 1, captured.countSubject(subject.Outbox("site-a", "site-b", model.OutboxTypeRoomCreated))) +} + +func TestSyncCreateDM_RetryIdempotent(t *testing.T) { + ctx := context.Background() + store := setupMongo(t) + siteID := "site-a" + + seedUser(t, store, &model.User{ID: "u-alice", Account: "alice", SiteID: siteID, EngName: "Alice", ChineseName: "愛麗絲"}) + seedUser(t, store, &model.User{ID: "u-bob", Account: "bob", SiteID: siteID, EngName: "Bob", ChineseName: "鮑勃"}) + + captured := newPublishCapturer() + handler := NewHandler(store, siteID, captured.fn) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + + reqCtx := newIntegRequestCtx() // same X-Request-ID for both calls + r1, err := handler.handleSyncCreateDM(reqCtx, data) + require.NoError(t, err) + r2, err := handler.handleSyncCreateDM(reqCtx, data) + require.NoError(t, err) + + var rep1, rep2 model.SyncCreateDMReply + require.NoError(t, json.Unmarshal(r1, &rep1)) + require.NoError(t, json.Unmarshal(r2, &rep2)) + assert.Equal(t, rep1.Subscription.RoomID, rep2.Subscription.RoomID) + + // Exactly one Room and two subs in Mongo. + roomID := idgen.BuildDMRoomID("u-alice", "u-bob") + room, err := store.GetRoom(ctx, roomID) + require.NoError(t, err) + assert.Equal(t, roomID, room.ID) + // (Optionally: count subs by querying the subscriptions collection directly via the test helper.) +} +``` + +> **Note:** the helpers `setupMongo`, `seedUser`, `newPublishCapturer`, and `newIntegRequestCtx` are placeholders — wire them to whatever the existing `room-worker/integration_test.go` already uses. If `seedUser` doesn't exist, the existing fixture-loading pattern in the file (look for the existing `TestProcess*` integration tests) is the template. If `newIntegRequestCtx` doesn't exist, define it once at the top of the file as `func newIntegRequestCtx() context.Context { return natsutil.WithRequestID(context.Background(), "01970a4f-8c2d-7c9a-abcd-e0123456789f") }`. Use a hard-coded valid UUID rather than generating one — `idgen.GenerateUUIDv7` returns the 32-char no-hyphen form which is reserved for entity Mongo `_id`s, not request IDs (see project CLAUDE.md §"Request Logging & Tracing"). + +- [ ] **Step 3: Run integration tests** + +Run: `cd /home/user/chat && make test-integration SERVICE=room-worker` + +Expected: PASS. + +- [ ] **Step 4: Commit** + +```bash +cd /home/user/chat +git add room-worker/integration_test.go +git commit -m "test(room-worker): integration tests for sync DM endpoint" +``` + +--- + +## Task 14: Final verification + +- [ ] **Step 1: Run lint** + +Run: `cd /home/user/chat && make lint` + +Expected: clean. Fix any issues before proceeding. + +- [ ] **Step 2: Run all unit tests with race detector** + +Run: `cd /home/user/chat && make test` + +Expected: all green. + +- [ ] **Step 3: Run room-worker integration tests** + +Run: `cd /home/user/chat && make test-integration SERVICE=room-worker` + +Expected: all green. + +- [ ] **Step 4: Manual smoke test (optional)** + +Spin up the local docker-compose for room-worker: + +```bash +cd /home/user/chat/room-worker/deploy +docker compose up -d +``` + +Issue a test request via `nats` CLI: + +```bash +nats request --creds chat.server.request.room.site-local.create.dm \ + '{"roomType":"dm","requesterAccount":"alice","otherAccount":"bob"}' \ + -H "X-Request-ID: 01970a4f-8c2d-7c9a-abcd-e0123456789f" +``` + +Expected: success reply with `success:true` and the requester's `Subscription`. (Requires Mongo to be seeded with both Users beforehand.) + +- [ ] **Step 5: No final commit** + +Verification doesn't produce code changes. If any of steps 1–3 surfaced a bug, fix it as a new commit on the branch. + +--- + +## Out-of-scope (handled separately) + +- **Part 2 — `HistoryConfig.SharedSince` removal.** Will be a separate plan in `docs/superpowers/plans/2026-05-07-historyconfig-sharedsince-removal.md`. +- **NATS callout policy update.** Adding `chat.server.request.room.>` permission for `room-worker` is an ops/IaC change, owned outside this repo. +- **user-service caller.** The actual caller (which performs the validation contract per the spec) is a separate PR. + +--- + +## Post-implementation deviations + +See the spec's "Implementation deviations from spec" section for the authoritative table. diff --git a/docs/superpowers/plans/2026-05-07-sync-dm-endpoint-part2.md b/docs/superpowers/plans/2026-05-07-sync-dm-endpoint-part2.md new file mode 100644 index 000000000..363e72aa2 --- /dev/null +++ b/docs/superpowers/plans/2026-05-07-sync-dm-endpoint-part2.md @@ -0,0 +1,126 @@ +# Sync DM Endpoint — Part 2: Remove `HistoryConfig.SharedSince` + +## Context + +`pkg/model/member.go:19` defines `HistoryConfig` with two fields: + +```go +type HistoryConfig struct { + Mode HistoryMode `json:"mode" bson:"mode"` + SharedSince *int64 `json:"sharedSince,omitempty" bson:"sharedSince,omitempty"` +} +``` + +`SharedSince` is a caller-supplied override on `AddMembersRequest.History`. When `Mode == HistoryModeNone` and `SharedSince` is non-nil and positive, room-worker uses it as the per-subscription `HistorySharedSince` (and on the outbound `MemberAddEvent`) instead of `req.Timestamp` (the request acceptance time). + +**Why we're removing it.** The synchronous DM-create endpoint introduced in Part 1 has no caller that supplies an explicit cutoff — the DM endpoint always uses request-acceptance time as the cutoff. No production code path, no test outside the dedicated SharedSince-precedence tests, and no client API consumer (per `docs/client-api.md`) sets `SharedSince`. The field is dead weight: it complicates `historySharedSincePtr`, requires its own dedicated unit test (`handler_test.go:2401`), and forces every test that constructs `HistoryConfig{Mode: HistoryModeNone}` to think about whether it cares. Removing it collapses the helper to a single branch and simplifies the request schema before downstream code stabilizes around it. + +**Scope.** This plan removes ONLY the `SharedSince` field on `HistoryConfig` and the override path in `room-worker`. The per-subscription `Subscription.HistorySharedSince` field is unaffected — that's the durable per-user cutoff stored on every subscription and read by history-service to gate message access. The plan also leaves `MemberAddEvent.HistorySharedSince` (the cross-site cutoff propagated to remote sites) intact: room-worker continues to populate it from `req.Timestamp` so remote-site sub creation matches home-site sub creation. + +**Verification before acting.** This plan is built from the survey in the prior message. Before writing any code, the executor MUST re-confirm by running: + +```bash +grep -rn "HistoryConfig{" /home/user/chat --include='*.go' +grep -rn "History\.SharedSince\|history\.SharedSince" /home/user/chat --include='*.go' +grep -rn "\"sharedSince\"" /home/user/chat +``` + +Expected hits — all under `pkg/model/member.go`, `room-worker/handler.go`, `room-worker/handler_test.go` (the three tests at lines ~1573–1652 and ~2401), and possibly `docs/client-api.md`. Anything outside these files is a surprise — STOP and reassess before continuing. In particular, if `auth-service`, `room-service`, `inbox-worker`, or `history-service` reference `History.SharedSince` directly, the scope has changed and this plan must be revised. + +**Coordination with Part 1.** Part 1 (sync DM endpoint) does not write to `History.SharedSince` and does not read it. Part 2 can be done before Part 1, after Part 1, or in parallel — the two share no files except potentially `docs/client-api.md`. Recommend executing Part 2 *after* Part 1 lands, so Part 1's diff stays focused. + +--- + +## Tasks + +### Task 1 (Red): Update existing tests to drop `SharedSince` + +Touch `room-worker/handler_test.go`: + +1. **Delete** `TestProcessAddMembers_HistoryNone_ExplicitSharedSince` (around lines 1573–1612 — the test that sets `SharedSince: &explicitMs`). The behavior it asserts no longer exists. + +2. **Delete** `TestHandler_ProcessAddMembers_HistorySharedSinceWinsOverTimestamp` (around lines 2401–2448). Same reason. + +3. **Keep but verify unchanged**: + - `TestProcessAddMembers_HistoryNone_NoTimestamp` (~line 1615) — already uses `HistoryConfig{Mode: HistoryModeNone}` with no `SharedSince`. Fallback behavior is preserved. + - `TestProcessAddMembers_NoHistoryConfig_LeavesNil` (~line 1655) — already uses zero-value `HistoryConfig`. Behavior preserved. + +4. **Find any other** `HistoryConfig{...SharedSince...}` literals via `grep -n 'SharedSince' room-worker/handler_test.go room-worker/integration_test.go`. Remove the field. Survey at planning time found all uses are in the two tests above, but the executor must re-grep to be safe. + +5. Run `make test SERVICE=room-worker`. The two deletions should already pass; the rest should still pass without modification because the `SharedSince` field hasn't been removed from `model` yet — the tests just no longer reference it. + +> **Why this is the Red step despite passing.** Per CLAUDE.md §"Test-Driven Development": the Red step is "write tests that fail without the change, pass with it." Here the change is *removal*, so Red is "delete tests that assert the removed behavior, leaving only tests that constrain the kept behavior." The kept tests will still pass at this point; Task 2's removal is what they protect. + +**Commit:** `room-worker: drop tests for HistoryConfig.SharedSince override` + +--- + +### Task 2 (Green): Remove the field and the override branch + +1. **`pkg/model/member.go:19-23`** — remove the `SharedSince` field, leaving: + + ```go + type HistoryConfig struct { + Mode HistoryMode `json:"mode" bson:"mode"` + } + ``` + +2. **`room-worker/handler.go:58-79`** — collapse `historySharedSincePtr` to: + + ```go + // historySharedSincePtr resolves the History.SharedSince value for an + // add-member request. Mode != HistoryModeNone → nil (history is unrestricted). + // Otherwise returns req.Timestamp (the request acceptance time). + func historySharedSincePtr(history model.HistoryConfig, timestamp int64, roomID string) *int64 { + if history.Mode != model.HistoryModeNone { + return nil + } + if timestamp <= 0 { + slog.Error("restricted history with missing timestamp, emitting nil", "roomID", roomID, "mode", history.Mode) + return nil + } + return ×tamp + } + ``` + + Note: keep the `roomID` parameter and `slog.Error` log line — they're load-bearing diagnostics, not just there to support the dropped branch. Both call sites (`handler.go:616` and `handler.go:729`) pass them already; their signatures don't change. + +3. **`pkg/model/model_test.go:992`** — no change needed (already uses `HistoryConfig{Mode: HistoryModeAll}` with no `SharedSince`). Confirm the round-trip test still passes. + +4. Run `make generate` (no store interface changed, but cheap to confirm), then `make lint` and `make test SERVICE=room-worker`. All green. + +5. Run `make test` at repo root to catch any unexpected references in other services. If it fails, STOP — the survey missed something and the plan must be revised before continuing. + +**Commit:** `pkg/model,room-worker: remove HistoryConfig.SharedSince override` + +--- + +### Task 3 (Refactor): Documentation + +1. **`docs/client-api.md`** — search for `sharedSince` (case-insensitive). If the add-members request schema documents the field, remove that line. If there's prose like "callers may supply an explicit cutoff via `history.sharedSince`", remove it. If `sharedSince` is not present in the doc, no change needed; record that observation in the commit body. + +2. No CLAUDE.md change — the project guide doesn't reference this field. + +3. Run `make lint` once more. + +**Commit:** `docs/client-api: drop history.sharedSince from add-members schema` (skip this commit entirely if step 1 found nothing). + +--- + +## Out of scope + +- `Subscription.HistorySharedSince` (per-user cutoff stored on every subscription) — kept; it's the durable cutoff history-service gates on. +- `MemberAddEvent.HistorySharedSince` (cross-site event payload) — kept; remote sites need it to set the same cutoff on their local subs. +- `history-service` `GetHistorySharedSince` repo method and its tests (`history-service/internal/mongorepo/subscription.go`, `history-service/internal/service/threads_test.go`) — kept entirely; they read `Subscription.HistorySharedSince`, not `HistoryConfig.SharedSince`. +- Any migration of existing MongoDB documents — none needed. `HistoryConfig` is embedded only in the in-flight `AddMembersRequest` NATS event, never persisted. Once room-worker drains in-flight requests during deploy, no document carries `history.sharedSince` anywhere. (If the executor finds `HistoryConfig` persisted in a Mongo document during the verification grep, STOP — the assumption is wrong.) + +## Risks + +- **In-flight NATS messages during deploy.** If a request was published to `MESSAGES`/`ROOMS` with a non-nil `sharedSince` field before the deploy and is consumed after, the new `room-worker` will silently drop the override (Go's `encoding/json` ignores unknown fields by default). This is acceptable: no production caller sets the field today, and even if one did, falling back to `req.Timestamp` is the documented default. +- **Test flake risk.** None — the deleted tests are independent. + +## Done when + +- `grep -rn "SharedSince" /home/user/chat --include='*.go' | grep -v 'HistorySharedSince'` returns zero hits (the kept `Subscription.HistorySharedSince` field continues to match — the `grep -v` filters it out). +- `make lint && make test` pass at repo root. +- `make test-integration SERVICE=room-worker` passes. diff --git a/docs/superpowers/specs/2026-05-05-sync-dm-and-historyconfig-design.md b/docs/superpowers/specs/2026-05-05-sync-dm-and-historyconfig-design.md new file mode 100644 index 000000000..762b41bae --- /dev/null +++ b/docs/superpowers/specs/2026-05-05-sync-dm-and-historyconfig-design.md @@ -0,0 +1,481 @@ +# Sync Server-to-Server DM Creation + HistoryConfig Cleanup + +**Date:** 2026-05-05 +**Status:** Draft +**Branch:** `claude/sync-dm-endpoint` (targets `main`) + +## Summary + +Two related-but-distinct changes batched into a single PR: + +**Part 1 — Sync server-to-server DM creation.** A new NATS request/reply endpoint **served by `room-worker`** that lets internal services (e.g. `user-service` for its app-subscribe flow) create a DM or botDM **synchronously** and receive the **requester's `Subscription` doc** back in the reply. The endpoint is intentionally **persistence-only** — caller-side validation is the contract: `user-service` does the existence/policy/dedup checks before issuing the request, and `room-worker` trusts the validated payload, persists Room + subs inline, emits the same events the async path does (`subscription.update`, cross-site outbox), and replies with the requester's Subscription. No JetStream hop, no async-job-result correlation, and no `room-service` involvement on this path. Returning the requester's Subscription (rather than the Room) saves the caller a follow-up `GetSubscription` lookup: the Subscription is the per-user entity user-service ultimately needs, and `Subscription.RoomID` carries the room identifier for free. + +**Part 2 — Remove `HistoryConfig.SharedSince`.** Per reviewer feedback on PR #142: `HistoryConfig` no longer carries a `SharedSince` field — it has only `Mode`. `Subscription.HistorySharedSince` is now derived exclusively from `acceptedAt` (the room-service request-acceptance time) when `HistoryMode == "none"`. + +The two parts are independent in code but ship together because both touch `pkg/model` and the room-creation pipeline. + +## Scope + +In scope: + +**Part 1 — Sync DM endpoint (room-worker-hosted)** +- New NATS subject `chat.server.request.room.{siteID}.create.dm` (queue group `room-worker`, server credentials). +- New handler `natsServerCreateDM` in **`room-worker`** running persistence + outbox + events inline. Reply is the **requester's `Subscription` doc** (not the Room — the Subscription carries `RoomID` and is the per-user state user-service consumes; this also matches the legacy old-repo flow where the caller queried the requester's subscription after creation). +- **Caller-side validation contract.** `user-service` is responsible for all data-integrity checks before issuing the request: requester/counterpart existence, requester/counterpart `EngName`/`ChineseName` (DM only), self-DM rejection, bot `Assistant.Enabled` (botDM only), and **dedup-existing via `FindDMSubscription`** (caller skips the call when an existing DM is found and uses the existing `RoomID` directly). `room-worker` performs only minimal request-shape sanity checks (non-empty IDs/accounts, `RoomType ∈ {dm, botDM}`) and trusts the rest. +- Reuses the per-type sub-building from `room-worker`'s existing `processCreateRoomDM` / `processCreateRoomBotDM` via a small internal extraction (`buildDMSubs` / `buildBotDMSubs` helpers shared between the async-path code and the new sync handler). No new shared `pkg/createroom` package; `room-service` is not touched on this path. +- Cross-site counterparts: home-side state (Room + both subs) is written inline before reply; the `room_created` outbox event to the counterpart's site fires before reply but its remote-side materialization is best-effort eventually-consistent (same semantics as the async path). +- Idempotent against retries and races: insert-time duplicate-key on the Room or subscriptions unique index is treated as a redelivery — fetch the requester's existing `Subscription` and reply with it (same envelope as the freshly-created case). + +**Part 2 — HistoryConfig.SharedSince removal** +- `pkg/model/member.go`: drop `HistoryConfig.SharedSince`. +- `room-worker/handler.go.historySharedSincePtr`: signature reduces to `(mode HistoryMode, acceptedAt time.Time) *int64`. No more fallback chain, no caller-supplied branch. +- `room-service`: stop reading `req.History.SharedSince` (the field is gone). +- Tests across `pkg/model`, `room-service`, `room-worker`, `inbox-worker` updated to drop the field. + +Out of scope: + +- Channel sync creation (different surface — channel orchestration includes org expansion, capacity, sys-messages, broadcast; not needed for the immediate user-service flow). Channel sync remains a future follow-up. +- The user-service caller (the eventual consumer of the new sync endpoint) — separate PR. The validation contract this PR documents is enforced by user-service in that follow-up. +- DM cross-site simultaneous-create tiebreak (still recorded as a known gap from the original create-room spec). +- Restoration of any pre-existing `Restricted` channel semantics or other create-room behavior. +- A shared `pkg/createroom/validate` package. With user-service as the single sync caller, validation lives in user-service and the existing async-path validation stays in `room-service`. If a second sync caller appears, extract. + +## Part 1: Sync Server-to-Server DM Creation + +### Topology + +```text +user-service ──validates──> NATS req/reply ──> room-worker ──persists──> Mongo + │ + ├── publishes subscription.update events + └── publishes room_created outbox events +``` + +`room-service` is **not** in this path. The async user-facing path (`chat.user.{account}.request.room.{siteID}.create`) is unchanged. + +### NATS subject + +| Operation | Subject | Wildcard | Queue Group | Auth | +|-----------|---------|----------|-------------|------| +| Sync create DM/botDM | `chat.server.request.room.{siteID}.create.dm` | `chat.server.request.room.{siteID}.create.dm` | `room-worker` | Server credentials (same `chat.server.>` callout policy used by `RoomsInfoBatchSubject`) | + +The subject lives in the `chat.server.*` namespace, which is gated by NATS callout to require server credentials. The callout policy must be updated to permit subscribe on this subject from `room-worker` (today the policy permits `chat.server.>` on `room-service` for `RoomsInfoBatchSubject`). + +The subject does NOT carry the requester's account (unlike the user-facing `chat.user.{account}.request.room.{siteID}.create`); the requester is named in the request payload. + +`X-Request-ID` header is mandatory on the inbound request — the handler rejects with `errMissingRequestID` if absent. The header is propagated through any subsequent NATS hop (subscription.update events, outbox events) via `natsutil.NewMsg(ctx, ...)`. + +### Caller-side validation contract (binding on user-service) + +`user-service` MUST perform the following checks before issuing this request. `room-worker` does not re-run them; bad data passed in lands directly in Mongo. The list mirrors today's `room-service/handler.go:147-286` validation for DM/botDM: + +| Check | Source of truth | Rejection reason | +|---|---|---| +| Requester user exists, has `EngName` and `ChineseName` populated | users collection | invalid requester | +| Counterpart account != requester account | input | self-DM | +| Counterpart user exists | users collection | counterpart not found | +| For `dm`: counterpart has `EngName` and `ChineseName` populated | users collection | invalid counterpart | +| For `botDM`: app exists in apps collection AND `app.Assistant != nil` AND `app.Assistant.Enabled == true` | apps collection | bot not available | +| Dedup-existing: `subscriptions.findOne({u.account: requester, name: counterpart, roomType $in {dm, botDM}})` returns nothing | subscriptions collection | (not a rejection — caller short-circuits and returns the existing `RoomID` to its own caller without calling room-worker) | +| Account classification: `roomType = botDM` if counterpart account ends in `.bot` or starts with `p_`, else `dm` | input | (not a rejection — caller stamps `RoomType` in the request) | + +The dedup check is owned by user-service so room-worker stays a pure writer. **However**, room-worker still handles the duplicate-key error from the unique index defensively — concurrent races between two simultaneous user-service calls (or a user-service retry colliding with itself) are caught at insert time and treated as success-on-redelivery (see step 6 in the validation pipeline below). + +### Reply contract + +The success reply schema lives at `pkg/model.SyncCreateDMReply`: + +```go +// pkg/model/member.go +type SyncCreateDMReply struct { + Success bool `json:"success"` // always true on this path + Subscription model.Subscription `json:"subscription"` // the requester's sub +} +``` + +**Success:** + +```json +{ + "success": true, + "subscription": { /* full model.Subscription — the requester's sub */ } +} +``` + +The reply payload is the requester-scoped `Subscription` (the one keyed by `RequesterAccount`), not the counterpart's. `Subscription.RoomID` carries the room identifier; callers that need additional Room metadata can look it up via the existing `RoomsInfoBatchSubject`. + +Both branches that succeed return the same envelope shape: +- **Newly inserted DM/botDM** — a fresh `Room` was inserted alongside two new subscriptions; the requester's new subscription is returned. +- **Dedup-existing** — an existing DM/botDM matched the dedup query (`subscriptions.findOne({u.account, name, roomType $in {dm,botDM}})`); the requester's existing `Subscription` is returned unchanged. No new room or subscriptions are written. + +There is intentionally no `created` flag distinguishing the two cases. The caller (user-service app-subscribe) only needs the requester's Subscription to proceed; the new-vs-existing distinction is invisible by design — both cases mean "the DM exists and the requester is subscribed to it." If a future caller genuinely needs to distinguish, it can compare `Subscription.CreatedAt` against the request time. + +**Error:** standard `model.ErrorResponse` via `natsutil.ReplyError` (codebase-wide pattern): + +```json +{ + "error": "" +} +``` + +The error path matches every other NATS request/reply handler in the repo — the handler calls `natsutil.ReplyError(msg, sanitizeError(err).Error())`, and callers TryParseError the reply before falling back to unmarshalling `SyncCreateDMReply`. No new error envelope is introduced. The `Success` bool on `SyncCreateDMReply` is retained as an explicit-positive signal (cheap to compare on the caller side) but on this path is always `true`. + +### Inbound request + +The sync endpoint uses a **dedicated request type** rather than reusing `CreateRoomRequest`. The latter carries fields (`Orgs`, `Channels`, `Name`, `History`, etc.) that have no meaning for a pre-validated DM/botDM create — accepting them would invite drift between what user-service sends and what room-worker honors. + +```go +// pkg/model/member.go +type SyncCreateDMRequest struct { + RoomType RoomType `json:"roomType"` // "dm" | "botDM" + RequesterAccount string `json:"requesterAccount"` + OtherAccount string `json:"otherAccount"` +} +``` + +Three fields, all required. The request is intentionally minimal — room-worker re-resolves the User records from accounts to fetch the data it needs for persistence: + +| What room-worker derives at handler entry | Source | +|---|---| +| `requester.ID`, `requester.SiteID` | `store.GetUser(req.RequesterAccount)` — note `requester.SiteID == h.siteID` is invariant (user-service routes to the requester's home-site room-worker) | +| `other.ID`, `other.SiteID` | `store.GetUser(req.OtherAccount)` | +| Whether to emit cross-site outbox | `other.SiteID != h.siteID` | +| `roomID` | `idgen.BuildDMRoomID(requester.ID, other.ID)` | + +Sanity checks performed by room-worker: + +| Check | Rejection | +|---|---| +| `RoomType ∈ {dm, botDM}` | `errInvalidSyncDMRequest` | +| `RequesterAccount` and `OtherAccount` non-empty | `errInvalidSyncDMRequest` | +| `RequesterAccount != OtherAccount` | `errInvalidSyncDMRequest` (defense in depth) | +| `requester.SiteID == h.siteID` | `errCrossSiteRequester` (room-worker on the wrong site for this requester — user-service routing bug) | + +Notably absent: requester/counterpart `EngName`/`ChineseName` checks, bot `Assistant.Enabled` checks, and dedup-existing — those live in user-service per the validation contract above. The two `GetUser` calls room-worker makes are purely for **data resolution** (User.ID, User.SiteID), not validation. If `GetUser` returns `ErrUserNotFound`, room-worker treats it as an operational error (`errUserLookupFailed`) — user-service should have already verified existence; arriving here means a race or a caller-contract violation. + +room-worker stamps its own `acceptedAt = time.Now().UTC()` at handler entry. There is no `Timestamp` field on the request; per the project-wide principle, the backend never trusts a client-supplied timestamp. + +### Handler pipeline + +```text +1. Read X-Request-ID from ctx (set by natsrouter middleware). + If empty → reject errMissingRequestID. + If not a valid UUID → reject errInvalidRequestID. + +2. Unmarshal SyncCreateDMRequest from m.Msg.Data. + Validate request shape (RoomType ∈ {dm, botDM}, RequesterAccount and + OtherAccount non-empty, RequesterAccount != OtherAccount). + Otherwise → reject errInvalidSyncDMRequest. + +3. Resolve User records (data lookup, not validation): + requester, err := h.store.GetUser(ctx, req.RequesterAccount) + On ErrUserNotFound or any error → reject errUserLookupFailed. + If requester.SiteID != h.siteID → reject errCrossSiteRequester + (user-service routed to the wrong site). + other, err := h.store.GetUser(ctx, req.OtherAccount) + On ErrUserNotFound or any error → reject errUserLookupFailed. + +4. acceptedAt := time.Now().UTC() + roomID := idgen.BuildDMRoomID(requester.ID, other.ID) + +5. Persist Room: + room := &model.Room{ + ID: roomID, + Name: "", // DM/botDM Room.Name is empty + Type: req.RoomType, + CreatedBy: requester.ID, + SiteID: h.siteID, + CreatedAt: acceptedAt, + UpdatedAt: acceptedAt, + } + h.store.CreateRoom(ctx, room). + On mongo.ErrDuplicateKey → fetch existing Room. If existing matches + on (Type, SiteID, Name, CreatedBy) → treat as redelivery and continue + with existing room. On mismatch → reject errRoomIDCollision. + +6. Persist subscriptions (per-type sub-building via buildDMSubs / + buildBotDMSubs; same shape as room-worker's existing + processCreateRoomDM / processCreateRoomBotDM): + DM: + subs = [ + newSub(requester, SiteID: requester.SiteID (== h.siteID), + room, name: other.Account, + isSubscribed: true, acceptedAt), + newSub(other, SiteID: other.SiteID, + room, name: requester.Account, + isSubscribed: true, acceptedAt), + ] + botDM: + subs = [ + newSub(requester, SiteID: requester.SiteID, room, + name: other.Account, isSubscribed: true, acceptedAt), + newSub(other, SiteID: other.SiteID, room, + name: requester.Account, isSubscribed: false, acceptedAt), + ] + h.store.BulkCreateSubscriptions(ctx, subs). + On mongo.ErrDuplicateKey (race against a concurrent caller, or a + user-service retry whose first attempt already landed) → fetch the + requester's existing subscription via FindDMSubscription and reply + with it (success-on-redelivery branch, see step 10). + +7. Reconcile member counts: + h.store.ReconcileMemberCounts(ctx, room.ID). + On error: log and continue. Counts are recomputed by every membership + change; transient failure self-heals. + +8. Publish per-user subscription.update events: + For each sub in subs, publish on subject.SubscriptionUpdate(sub.User.Account) + with payload SubscriptionUpdateEvent{UserID, Subscription, Action:"added", + Timestamp: acceptedAt.UnixMilli()}. + NATS supercluster routes cross-site users' events to their home sites. + On error: log and continue. + +9. Publish cross-site room_created outbox event: + If other.SiteID != h.siteID, emit one outbox event: + subject: outbox.{h.siteID}.to.{other.SiteID}.room_created + payload: RoomCreatedOutbox{ + RoomID, RoomType: req.RoomType, RoomName: "", + HomeSiteID: h.siteID, + Accounts:[other.Account], + RequesterAccount: requester.Account, + Timestamp: acceptedAt.UnixMilli(), + } + Nats-Msg-Id: requestID + ":" + other.SiteID + Fire-and-forget — the reply does NOT wait for the remote site's + inbox-worker to materialize the sub. On error: log and continue. + +10. Reply to the caller: + Pick the requester-scoped subscription out of the subs slice (the + one whose User.Account == req.RequesterAccount). On the duplicate-key + redelivery branch in step 6, fetch via FindDMSubscription instead. + natsutil.ReplyJSON(msg, model.SyncCreateDMReply{ + Success: true, + Subscription: requesterSub, + }) +``` + +Two `GetUser` calls on the happy path (steps 3) — for **data resolution**, not validation. `FindDMSubscription` is called only on the duplicate-key redelivery branch (step 6). Total handler ~100 LOC. + +### Refactor — internal extraction in room-worker + +room-worker hosts both the existing async path (`processCreateRoom` chain) and the new sync handler. The two share the per-type sub-building; everything else is path-specific. Two small extractions inside room-worker: + +1. **`buildDMSubs(requester, other *model.User, room *model.Room, acceptedAt time.Time) []*model.Subscription`** — the two-line DM sub-building, called by both `processCreateRoomDM` (existing async path) and the new sync handler. Both paths have full `*model.User` records in scope at the call site (the sync handler resolves them via `GetUser` in step 3 of the handler pipeline). + +2. **`buildBotDMSubs(requester, bot *model.User, room *model.Room, acceptedAt time.Time) []*model.Subscription`** — same for botDM. + +Each User record provides `User.ID`, `User.Account`, and `User.SiteID` — everything the sub builder needs. + +No new shared package. No `pkg/createroom`. The async path's existing tests (`TestProcessCreateRoomDM`, `TestProcessCreateRoomBotDM`, `TestFinishCreateRoom`) continue to pass without modification — only the sub-construction line changes from inline to a helper call. + +### File layout + +| Path | Change | What it owns | +|------|--------|--------------| +| `pkg/model/member.go` | modify | new `SyncCreateDMRequest` and `SyncCreateDMReply` types | +| `pkg/model/member_test.go` | modify | JSON/BSON round-trip for the new types | +| `pkg/subject/subject.go` | modify | `RoomCreateDMSync(siteID)` builder | +| `pkg/subject/subject_test.go` | modify | round-trip test for the new subject | +| `room-worker/handler_sync_create_dm.go` | new | `natsServerCreateDM` handler + `handleSyncCreateDM` business logic | +| `room-worker/handler_sync_create_dm_test.go` | new | unit tests for the sync handler (mocked store) | +| `room-worker/handler.go` | modify | extract `buildDMSubs` / `buildBotDMSubs` from `processCreateRoomDM` / `processCreateRoomBotDM`. No behavior change. | +| `room-worker/main.go` | modify | subscribe on `chat.server.request.room.{siteID}.create.dm` with queue group `room-worker` and server credentials; add to graceful-shutdown drain order | +| `room-worker/integration_test.go` | modify | add `TestSyncCreateDM_*` integration tests with testcontainers | +| **No changes** | — | `room-service/*` is **not** touched by Part 1 | +| NATS callout policy (ops/IaC) | modify | permit subscribe on `chat.server.request.room.>` from `room-worker` (today the policy permits this only on `room-service` for `RoomsInfoBatchSubject`) | + +### Tests + +**Unit (`room-worker/handler_sync_create_dm_test.go`):** +- DM happy path: handler returns `success:true` with the requester's `Subscription` (its `User.Account == req.RequesterAccount`, `RoomID == BuildDMRoomID(requester.ID, other.ID)`, `IsSubscribed=true`, `Name == req.OtherAccount`); Room + 2 subs persisted in store; `subscription.update` events captured for both users. +- botDM happy path: returns `success:true` with the requester's `Subscription` (`IsSubscribed=true`); the bot's sub (`IsSubscribed=false`) is persisted in store but NOT returned. +- Cross-site DM: `other.SiteID != h.siteID` (resolved via `GetUser`) → captured outbox publish goes to the counterpart's site with the right payload shape (`RoomName=""`, `Accounts:[other.Account]`, `Nats-Msg-Id = requestID:destSite`). +- Same-site DM: `other.SiteID == h.siteID` → no outbox publish. +- Duplicate-key redelivery on Room insert (matching existing room): returns `success:true` with the requester's existing Subscription; no second sub inserted. +- Duplicate-key on subscription bulk insert (race): handler calls `FindDMSubscription` to fetch the requester's existing sub and returns it. +- Error path: malformed request (`RoomType` outside `{dm,botDM}`, empty `RequesterAccount`/`OtherAccount`, `RequesterAccount == OtherAccount`) rejected with `errInvalidSyncDMRequest`; reply parses as `model.ErrorResponse`. +- Requester `GetUser` fails with `ErrUserNotFound` → rejected with `errUserLookupFailed` (operational error; user-service should have verified existence). +- Requester resolves but `requester.SiteID != h.siteID` → rejected with `errCrossSiteRequester`. +- Other `GetUser` fails with `ErrUserNotFound` → rejected with `errUserLookupFailed`. +- Missing `X-Request-ID` rejected with `errMissingRequestID`. +- `ReconcileMemberCounts` failure: logged, reply still succeeds. +- `subscription.update` publish failure: logged, reply still succeeds. +- Outbox publish failure: logged, reply still succeeds. + +**Integration (`room-worker/integration_test.go`):** +- `TestSyncCreateDM_DM_PersistsRoomAndSubs`: real Mongo + testcontainers; asserts Room + 2 subs + `subscription.update` events fired. +- `TestSyncCreateDM_BotDM_CrossSiteOutbox`: cross-site botDM; asserts outbox event to the bot's home site. +- `TestSyncCreateDM_RetryIdempotent`: invoke twice with the same `X-Request-ID` (and same Room/Sub IDs); assert exactly one Room and two subs end up persisted; both replies return the same requester Subscription. +- `TestSyncCreateDM_ConcurrentRace`: two goroutines invoke simultaneously with the same `(requesterID, otherID)`; assert both succeed, exactly one Room and two subs persisted, both replies return the same Subscription. + +## Part 2: Remove `HistoryConfig.SharedSince` + +### Rule + +`HistoryConfig` carries only `Mode` — there is no `SharedSince` field on it. `Subscription.HistorySharedSince` is derived **only** from `acceptedAt` (the request-acceptance time stamped by `room-service`). The client never supplies this value. When `HistoryMode == "none"` the subscription is restricted starting at `acceptedAt`; otherwise the field is `nil` (full history visible). + +### Code changes + +**`pkg/model/member.go`:** + +```diff + type HistoryConfig struct { + Mode HistoryMode `json:"mode" bson:"mode"` +- // SharedSince (ms): when non-nil under HistoryModeNone, takes precedence over acceptedAt. +- SharedSince *int64 `json:"sharedSince,omitempty" bson:"sharedSince,omitempty"` + } +``` + +`MemberAddEvent.HistorySharedSince` (the cross-site outbox payload field) stays — it carries the server-derived value for `inbox-worker` to consume on remote sites. + +**`room-worker/handler.go`:** + +```diff +-// historySharedSincePtr resolves the History.SharedSince value for an +-// add-member request. Mode != HistoryModeNone → nil (history is unrestricted). +-// Otherwise prefers the caller-supplied req.History.SharedSince when non-nil +-// and positive, falling back to req.Timestamp. This single source of truth +-// keeps the local subscription's HistorySharedSince consistent with the +-// MemberAddEvent published to the user's subject and the cross-site outbox +-// payload — without it, an explicit caller cutoff would be silently +-// overwritten with the request acceptance timestamp at fan-out time. +-func historySharedSincePtr(history model.HistoryConfig, timestamp int64, roomID string) *int64 { +- if history.Mode != model.HistoryModeNone { +- return nil +- } +- if history.SharedSince != nil && *history.SharedSince > 0 { +- ss := *history.SharedSince +- return &ss +- } +- if timestamp <= 0 { +- slog.Error("restricted history with missing timestamp, emitting nil", "roomID", roomID, "mode", history.Mode) +- return nil +- } +- return ×tamp +-} ++// historySharedSincePtr returns &acceptedAt when history is restricted ++// (mode == HistoryModeNone), nil otherwise. Backend always uses ++// acceptedAt; clients never supply this timestamp. ++func historySharedSincePtr(mode model.HistoryMode, acceptedAt time.Time) *int64 { ++ if mode != model.HistoryModeNone { ++ return nil ++ } ++ ms := acceptedAt.UnixMilli() ++ return &ms ++} +``` + +Call sites (three of them) update from `historySharedSincePtr(req.History, req.Timestamp, req.RoomID)` to `historySharedSincePtr(req.History.Mode, acceptedAt)`. The `req.Timestamp <= 0 → time.Now()` defensive guard at the top of `processAddMembers` already normalizes the timestamp, so the helper no longer needs its own guard or the `roomID` parameter for logging. + +**Shipped deviation:** the helper kept its original `(history model.HistoryConfig, timestamp int64, roomID string)` signature to minimize call-site churn — only the body collapsed to the single-branch form (`Mode != HistoryModeNone → nil; otherwise ×tamp`). Call sites still pass `req.History`, `req.Timestamp`, `req.RoomID`. The `roomID` parameter is no longer read but stayed in the signature. + +**`room-service/handler.go`:** + +The `handleAddMembers` flow forwards `req.History` (now without `SharedSince`) through to room-worker via JetStream. With the field gone from the model, it's no longer carried in the payload. No code change beyond the existing dedup/strip pass; the JSON unmarshal on the worker side simply doesn't see it. + +**Tests:** + +Sweep `grep -rn "SharedSince:" room-service/ room-worker/ inbox-worker/ pkg/`. Every test that sets `History: HistoryConfig{Mode: ..., SharedSince: &ts}` drops the `SharedSince` line; assertions that compared against the caller-supplied value rebase to `acceptedAt`'s value. + +Estimated test changes: 5–10 lines across ~5 test files. + +### Why the change is safe + +- `HistoryConfig.SharedSince` is currently always set to either `nil` (the common case) or `&req.Timestamp` (the existing fallback in `historySharedSincePtr`). No production caller sends a meaningfully different value. +- The frontend (`chat-frontend`) doesn't read or set `HistoryConfig.SharedSince` directly; it only sets `Mode`. +- Cross-site `MemberAddEvent.HistorySharedSince` is server-computed in room-worker and remains unchanged on the wire (still an `int64` ms timestamp). + +The drop is purely a cleanup of a misleading API surface. No migration is needed. + +## Error Handling + +### Sentinel errors (room-worker) + +The sync handler defines a small set of new sentinels in `room-worker`. Validation-level sentinels from `room-service` (`errSelfDM`, `errUserNotFound`, `errInvalidUserData`, `errBotNotAvailable`) are NOT used here — those checks happen in user-service, which is responsible for surfacing its own error vocabulary to upstream callers. + +```go +// room-worker/handler_sync_create_dm.go +var ( + errInvalidSyncDMRequest = errors.New("invalid sync DM request") + errMissingRequestID = errors.New("missing X-Request-ID header") + errInvalidRequestID = errors.New("invalid X-Request-ID header") + errUserLookupFailed = errors.New("user lookup failed") + errCrossSiteRequester = errors.New("requester is not on this site") + errRoomIDCollision = errors.New("room ID collision (existing room metadata mismatch)") +) +``` + +`errInvalidSyncDMRequest` covers shape failures: invalid `RoomType`, empty `RequesterAccount`/`OtherAccount`, or `RequesterAccount == OtherAccount`. + +`errUserLookupFailed` is the operational error returned when `GetUser` fails for either account — including `ErrUserNotFound`. This is intentionally a single opaque sentinel: user-service is contractually required to have verified existence before calling, so any miss here is a contract violation or a race, not something the caller should branch on. + +`errCrossSiteRequester` indicates user-service routed to the wrong site (the requester's home site is not this room-worker's site). It surfaces a routing bug rather than a data error. + +### sanitizeError + +room-worker does not have a `sanitizeError` helper today (it doesn't reply to clients on the async path — its outputs go to JetStream). The sync handler adds a thin `sanitizeSyncDMError` helper that maps known sentinels to user-displayable strings and wraps anything else as `"internal error"` (never leaks raw error text). Returned via `natsutil.ReplyError(msg, sanitizeSyncDMError(err).Error())`. + +### Sync-path failure modes + +| Scenario | Behavior | +|---|---| +| `BulkCreateSubscriptions` partially fails (e.g. one of two subs hits a duplicate-key race against another concurrent sync call) | Treat as redelivery: fetch the requester's existing subscription and reply with the success envelope. | +| `ReconcileMemberCounts` fails | Log and continue; the count is recomputed by every membership change, so a transient failure self-heals. The reply still succeeds. | +| `subscription.update` publish fails | Log and continue; the user's frontend reconnects and reads the durable subscription. The reply still succeeds. | +| Outbox publish fails (the `room_created` event to a remote site) | Log; the reply still succeeds. The remote site won't have the sub until the next reconciliation; this matches the async path's behavior. | + +There is no error path that returns "partial success" to the caller. Either the room and subs are durably persisted on the home site (success envelope with the requester's Subscription), or the request fails and nothing is written (error envelope). + +## Testing Strategy + +### Unit tests + +- `pkg/subject/subject_test.go`: round-trip `RoomCreateDMSync(siteID)`. +- `pkg/model/member_test.go`: JSON/BSON round-trip for `SyncCreateDMRequest` and `SyncCreateDMReply`. Also assert `HistoryConfig` no longer marshals or unmarshals `sharedSince` (Part 2). Add an explicit assertion that `{"mode":"none","sharedSince":1740000000000}` JSON unmarshals to `HistoryConfig{Mode: HistoryModeNone}` with the extra field silently ignored — backward-compatible with any in-flight requests. +- `room-worker/handler_sync_create_dm_test.go`: see Part 1's test list above. +- `room-worker/handler_test.go`: regression coverage for `buildDMSubs` / `buildBotDMSubs` extraction — async path's existing `TestProcessCreateRoomDM` / `TestProcessCreateRoomBotDM` still pass without modification. + +### Integration tests + +- `room-worker/integration_test.go` adds the `TestSyncCreateDM_*` integration tests listed in Part 1. +- `room-worker/integration_test.go` and `inbox-worker/integration_test.go` get the test cleanups for Part 2 (`SharedSince:` removal). + +### Coverage targets + +- `room-worker` sync DM handler: ≥ 90%. +- `room-worker.buildDMSubs` / `buildBotDMSubs`: 100% (pure functions, two cases each). +- `room-worker.historySharedSincePtr`: 100% (only two branches). + +### Out-of-band verification + +- `make lint` clean. +- `make test` (full repo, race) green. +- `make test-integration SERVICE=room-worker` green. +- `make test-integration SERVICE=inbox-worker` green. +- Manual NATS request to local docker-compose (server credentials, targeting `chat.server.request.room.{siteID}.create.dm`) creates a botDM end-to-end and the reply contains `success:true` with the requester's `Subscription`. Re-issuing the same request returns the same Subscription (also `success:true`, indistinguishable from the first call). + +## Implementation deviations from spec + +| Spec said | Shipped | +|-----------|---------| +| Call `ReconcileMemberCounts`; log on error | Skip; set `Room.UserCount`/`AppCount` inline at create (DM=2,0; botDM=1,1). DMs have fixed rosters — no future membership change to self-heal. | +| Outbox publish: log and continue | Fail the request. Sync RPC has no auto-retry; silent federation break otherwise. | +| `FindDMSubscription` only on dup-key fallback | Always re-read after `BulkCreateSubscriptions` (the store swallows dup-key, in-memory sub may diverge from persisted). | +| `errUserLookupFailed` on any `GetUser` error | Only on `ErrUserNotFound`; transient errors wrapped with `%w err` → surface as `"internal error"`. | +| `errRoomIDCollision` surfaces verbatim | Masked as `"internal error"`; mismatch details logged. | +| New file `handler_sync_create_dm.go` | Sync code lives at the bottom of `room-worker/handler.go`; tests stay in `handler_sync_create_dm_test.go`. | + +Out-of-spec fix in same PR: `room-service/store_mongo.go` partial-unique DM dedup index used `roomType: {$in:…}` which MongoDB rejects in `partialFilterExpression`. Split into two `$eq`-filtered indexes (one per room type). + +--- + +## Follow-ups + +- **Sync caller in user-service** — the user-service-side caller of the new sync endpoint (for the app-subscribe flow) is a separate PR. That PR is responsible for implementing the validation contract documented above (existence checks, EngName/ChineseName checks, bot-enabled check, dedup-existing). This PR delivers the room-worker surface only. +- **Shared validation package** — if a second internal sync caller appears in the future, extract the validation list documented above into `pkg/createroom/validate` so callers don't drift. +- **DM cross-site simultaneous-create tiebreak** — recorded as a known gap in the original create-room spec. Not addressed here; equally affects the sync and async paths. +- **Channel sync creation** — if a future internal flow needs to create a channel synchronously, the same shape applies but with an expanded subject (`...create.channel`) and the per-type loop from `processCreateRoomChannel`. Out of scope here. + +--- + +*End of design document.* diff --git a/history-service/internal/mongorepo/room.go b/history-service/internal/mongorepo/room.go index 9b97ea65a..acc1dfbfc 100644 --- a/history-service/internal/mongorepo/room.go +++ b/history-service/internal/mongorepo/room.go @@ -9,17 +9,18 @@ import ( "go.mongodb.org/mongo-driver/v2/mongo" "github.com/hmchangw/chat/pkg/model" + "github.com/hmchangw/chat/pkg/mongoutil" ) const roomsCollection = "rooms" type RoomRepo struct { - rooms *Collection[model.Room] + rooms *mongoutil.Collection[model.Room] } func NewRoomRepo(db *mongo.Database) *RoomRepo { return &RoomRepo{ - rooms: NewCollection[model.Room](db.Collection(roomsCollection)), + rooms: mongoutil.NewCollection[model.Room](db.Collection(roomsCollection)), } } @@ -28,7 +29,7 @@ func NewRoomRepo(db *mongo.Database) *RoomRepo { func (r *RoomRepo) GetMinUserLastSeenAt(ctx context.Context, roomID string) (*time.Time, error) { room, err := r.rooms.FindOne(ctx, bson.M{"_id": roomID}, - WithProjection(bson.M{"minUserLastSeenAt": 1, "_id": 0}), + mongoutil.WithProjection(bson.M{"minUserLastSeenAt": 1, "_id": 0}), ) if err != nil { return nil, fmt.Errorf("get room %s minUserLastSeenAt: %w", roomID, err) diff --git a/history-service/internal/service/service.go b/history-service/internal/service/service.go index 802810ecc..e35c4b181 100644 --- a/history-service/internal/service/service.go +++ b/history-service/internal/service/service.go @@ -6,6 +6,7 @@ import ( "github.com/hmchangw/chat/history-service/internal/cassrepo" "github.com/hmchangw/chat/history-service/internal/models" + "github.com/hmchangw/chat/history-service/internal/mongorepo" pkgmodel "github.com/hmchangw/chat/pkg/model" "github.com/hmchangw/chat/pkg/mongoutil" "github.com/hmchangw/chat/pkg/natsrouter" diff --git a/pkg/minioutil/minio_integration_test.go b/pkg/minioutil/minio_integration_test.go index 81b8f4bc8..164513ae6 100644 --- a/pkg/minioutil/minio_integration_test.go +++ b/pkg/minioutil/minio_integration_test.go @@ -39,7 +39,9 @@ func TestIntegration_Connect_Smoke(t *testing.T) { func TestIntegration_NewBucket_Success(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") - type doc struct{ Name string `json:"name"` } + type doc struct { + Name string `json:"name"` + } b, err := NewBucket[doc](context.Background(), client, bucketName) require.NoError(t, err) require.NotNil(t, b) @@ -51,7 +53,9 @@ func TestIntegration_NewBucket_Success(t *testing.T) { func TestIntegration_NewBucket_MissingBucket(t *testing.T) { client, _ := testutil.MinIO(t, "minioutil") - type doc struct{ Name string `json:"name"` } + type doc struct { + Name string `json:"name"` + } _, err := NewBucket[doc](context.Background(), client, "definitely-does-not-exist") require.Error(t, err) assert.Contains(t, err.Error(), "definitely-does-not-exist") @@ -93,7 +97,9 @@ func TestIntegration_Put_Overwrites(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -138,7 +144,9 @@ func TestIntegration_Get_MissingKey(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -155,7 +163,9 @@ func TestIntegration_Get_MalformedJSON(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -174,7 +184,9 @@ func TestIntegration_List_Prefix(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -200,7 +212,9 @@ func TestIntegration_List_ZeroMaxKeysReturnsAll(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -221,7 +235,9 @@ func TestIntegration_List_MaxKeysCap(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -254,7 +270,9 @@ func TestIntegration_List_EmptyResult(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -270,7 +288,9 @@ func TestIntegration_Delete_RemovesExisting(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) @@ -289,7 +309,9 @@ func TestIntegration_Delete_Idempotent(t *testing.T) { client, bucketName := testutil.MinIO(t, "minioutil") ctx := context.Background() - type doc struct{ V int `json:"v"` } + type doc struct { + V int `json:"v"` + } b, err := NewBucket[doc](ctx, client, bucketName) require.NoError(t, err) diff --git a/pkg/model/member.go b/pkg/model/member.go index eae323fe4..9232b1535 100644 --- a/pkg/model/member.go +++ b/pkg/model/member.go @@ -18,8 +18,6 @@ const ( type HistoryConfig struct { Mode HistoryMode `json:"mode" bson:"mode"` - // SharedSince (ms): when non-nil under HistoryModeNone, takes precedence over acceptedAt. - SharedSince *int64 `json:"sharedSince,omitempty" bson:"sharedSince,omitempty"` } // ChannelRef identifies a source channel by room + its home site. Used by add-member @@ -149,3 +147,17 @@ type CreateRoomRequest struct { RequesterAccount string `json:"requesterAccount" bson:"requesterAccount"` Timestamp int64 `json:"timestamp" bson:"timestamp"` } + +// SyncCreateDMRequest is the request payload for chat.server.request.room.{siteID}.create.dm. +// Caller (user-service) is responsible for all data-integrity validation before issuing. +type SyncCreateDMRequest struct { + RoomType RoomType `json:"roomType" bson:"roomType"` + RequesterAccount string `json:"requesterAccount" bson:"requesterAccount"` + OtherAccount string `json:"otherAccount" bson:"otherAccount"` +} + +// SyncCreateDMReply is the success reply; errors flow via natsutil.ReplyError instead. +type SyncCreateDMReply struct { + Success bool `json:"success" bson:"success"` + Subscription Subscription `json:"subscription" bson:"subscription"` +} diff --git a/pkg/model/model_test.go b/pkg/model/model_test.go index 12fbe66ef..3a6cdbc88 100644 --- a/pkg/model/model_test.go +++ b/pkg/model/model_test.go @@ -1981,3 +1981,45 @@ func TestMessageTypeAndAsyncJobStatusConstants(t *testing.T) { assert.Equal(t, "ok", model.AsyncJobStatusOK) assert.Equal(t, "error", model.AsyncJobStatusError) } + +func TestSyncCreateDMRequestJSON(t *testing.T) { + src := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + b, err := json.Marshal(src) + require.NoError(t, err) + + assert.JSONEq(t, `{"roomType":"dm","requesterAccount":"alice","otherAccount":"bob"}`, string(b)) + + var dst model.SyncCreateDMRequest + require.NoError(t, json.Unmarshal(b, &dst)) + assert.Equal(t, src, dst) +} + +func TestSyncCreateDMReplyJSON(t *testing.T) { + now := time.Date(2026, 5, 7, 12, 0, 0, 0, time.UTC) + src := model.SyncCreateDMReply{ + Success: true, + Subscription: model.Subscription{ + ID: "sub1", + User: model.SubscriptionUser{ID: "u1", Account: "alice"}, + RoomID: "room1", + SiteID: "site-a", + Name: "bob", + RoomType: model.RoomTypeDM, + IsSubscribed: true, + JoinedAt: now, + }, + } + b, err := json.Marshal(src) + require.NoError(t, err) + + var dst model.SyncCreateDMReply + require.NoError(t, json.Unmarshal(b, &dst)) + assert.True(t, dst.Success) + assert.Equal(t, src.Subscription.ID, dst.Subscription.ID) + assert.Equal(t, src.Subscription.User, dst.Subscription.User) + assert.Equal(t, src.Subscription.RoomID, dst.Subscription.RoomID) +} diff --git a/pkg/subject/subject.go b/pkg/subject/subject.go index e0e7bd32e..302b12239 100644 --- a/pkg/subject/subject.go +++ b/pkg/subject/subject.go @@ -192,6 +192,11 @@ func RoomsInfoBatch(siteID string) string { return fmt.Sprintf("chat.server.request.room.%s.info.batch", siteID) } +// RoomCreateDMSync is the server-to-server request subject for synchronous DM/botDM creation. +func RoomCreateDMSync(siteID string) string { + return fmt.Sprintf("chat.server.request.room.%s.create.dm", siteID) +} + // --- Wildcard patterns for subscriptions --- func MsgSendWildcard(siteID string) string { diff --git a/pkg/subject/subject_test.go b/pkg/subject/subject_test.go index 797d347d9..ce054713d 100644 --- a/pkg/subject/subject_test.go +++ b/pkg/subject/subject_test.go @@ -342,3 +342,11 @@ func TestRoomCanonicalOperation(t *testing.T) { }) } } + +func TestRoomCreateDMSync(t *testing.T) { + got := subject.RoomCreateDMSync("site-a") + want := "chat.server.request.room.site-a.create.dm" + if got != want { + t.Errorf("RoomCreateDMSync: got %q, want %q", got, want) + } +} diff --git a/pkg/testutil/testimages/testimages.go b/pkg/testutil/testimages/testimages.go index a6afd10a9..68ca5e68b 100644 --- a/pkg/testutil/testimages/testimages.go +++ b/pkg/testutil/testimages/testimages.go @@ -20,7 +20,10 @@ const ( Cassandra = "cassandra:4.1.3" // Mongo is the image for every Mongo-backed integration test. - Mongo = "mongo:8" + // Pinned to 4.4.15 to match production; catches operator-allow-list + // regressions like $in in partialFilterExpression that newer Mongo + // silently accepts. + Mongo = "mongo:4.4.15" // NATS is the image for every NATS-backed integration test // (core NATS + JetStream + WebSocket). diff --git a/room-service/store_mongo.go b/room-service/store_mongo.go index f1bf7f95f..bf5634098 100644 --- a/room-service/store_mongo.go +++ b/room-service/store_mongo.go @@ -79,28 +79,6 @@ func (s *MongoStore) EnsureIndexes(ctx context.Context) error { if _, err := s.apps.Indexes().CreateOne(ctx, appsIndex); err != nil { return fmt.Errorf("ensure apps index: %w", err) } - - // Partial UNIQUE index for FindDMSubscription, restricted to DM/botDM subs. - // Uniqueness is the database-layer enforcement of the create-room contract: - // concurrent CreateRoom requests racing to insert the same DM/botDM pair - // will see one succeed and the others receive a duplicate-key error, which - // the worker treats as "open existing" via the GetRoom lookup branch. - dmDedupIndex := mongo.IndexModel{ - Keys: bson.D{ - {Key: "u.account", Value: 1}, - {Key: "name", Value: 1}, - {Key: "roomType", Value: 1}, - }, - Options: options.Index(). - SetName("u_account_name_roomtype_dm_idx"). - SetUnique(true). - SetPartialFilterExpression(bson.M{ - "roomType": bson.M{"$in": bson.A{model.RoomTypeDM, model.RoomTypeBotDM}}, - }), - } - if _, err := s.subscriptions.Indexes().CreateOne(ctx, dmDedupIndex); err != nil { - return fmt.Errorf("ensure dm-dedup subscription index: %w", err) - } return nil } diff --git a/room-worker/handler.go b/room-worker/handler.go index 294eb2714..c17efda4a 100644 --- a/room-worker/handler.go +++ b/room-worker/handler.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/Marz32onE/instrumentation-go/otel-nats/otelnats" "github.com/nats-io/nats.go/jetstream" "go.mongodb.org/mongo-driver/v2/mongo" @@ -55,22 +56,11 @@ func messageDedupSeed(ctx context.Context, handler, roomID, payloadSeed string) return payloadSeed } -// historySharedSincePtr resolves the History.SharedSince value for an -// add-member request. Mode != HistoryModeNone → nil (history is unrestricted). -// Otherwise prefers the caller-supplied req.History.SharedSince when non-nil -// and positive, falling back to req.Timestamp. This single source of truth -// keeps the local subscription's HistorySharedSince consistent with the -// MemberAddEvent published to the user's subject and the cross-site outbox -// payload — without it, an explicit caller cutoff would be silently -// overwritten with the request acceptance timestamp at fan-out time. +// historySharedSincePtr returns nil for unrestricted history; req.Timestamp under HistoryModeNone. func historySharedSincePtr(history model.HistoryConfig, timestamp int64, roomID string) *int64 { if history.Mode != model.HistoryModeNone { return nil } - if history.SharedSince != nil && *history.SharedSince > 0 { - ss := *history.SharedSince - return &ss - } if timestamp <= 0 { slog.Error("restricted history with missing timestamp, emitting nil", "roomID", roomID, "mode", history.Mode) return nil @@ -864,6 +854,22 @@ func resolveRoomName(req *model.CreateRoomRequest, roomType model.RoomType) stri return "" } +// buildDMSubs returns the two DM subs (each names the counterpart, IsSubscribed=false). +func buildDMSubs(requester, other *model.User, room *model.Room, acceptedAt time.Time) []*model.Subscription { + return []*model.Subscription{ + newSub(idgen.GenerateUUIDv7(), requester, room, nil, other.Account, false, acceptedAt), + newSub(idgen.GenerateUUIDv7(), other, room, nil, requester.Account, false, acceptedAt), + } +} + +// buildBotDMSubs returns the two botDM subs (human IsSubscribed=true, bot IsSubscribed=false). +func buildBotDMSubs(requester, bot *model.User, room *model.Room, acceptedAt time.Time) []*model.Subscription { + return []*model.Subscription{ + newSub(idgen.GenerateUUIDv7(), requester, room, nil, bot.Account, true, acceptedAt), + newSub(idgen.GenerateUUIDv7(), bot, room, nil, requester.Account, false, acceptedAt), + } +} + // newSub constructs a Subscription from its constituent parts. func newSub(id string, user *model.User, room *model.Room, roles []model.Role, name string, isSubscribed bool, joinedAt time.Time) *model.Subscription { @@ -983,10 +989,7 @@ func (h *Handler) processCreateRoomDM(ctx context.Context, req *model.CreateRoom return fmt.Errorf("get counterpart: %w", err) } - subs := []*model.Subscription{ - newSub(idgen.GenerateUUIDv7(), requester, room, nil, other.Account, false, acceptedAt), - newSub(idgen.GenerateUUIDv7(), other, room, nil, requester.Account, false, acceptedAt), - } + subs := buildDMSubs(requester, other, room, acceptedAt) if err := h.store.BulkCreateSubscriptions(ctx, subs); err != nil { return fmt.Errorf("bulk create subs: %w", err) } @@ -1002,12 +1005,7 @@ func (h *Handler) processCreateRoomBotDM(ctx context.Context, req *model.CreateR return fmt.Errorf("get bot user: %w", err) } - subs := []*model.Subscription{ - // Human's sub: Name = bot account; IsSubscribed = true - newSub(idgen.GenerateUUIDv7(), requester, room, nil, bot.Account, true, acceptedAt), - // Bot's sub: Name = requester's account; IsSubscribed = false - newSub(idgen.GenerateUUIDv7(), bot, room, nil, requester.Account, false, acceptedAt), - } + subs := buildBotDMSubs(requester, bot, room, acceptedAt) if err := h.store.BulkCreateSubscriptions(ctx, subs); err != nil { return fmt.Errorf("bulk create subs: %w", err) } @@ -1228,3 +1226,233 @@ func (h *Handler) publishCanonical(ctx context.Context, msg *model.Message, site } return h.publish(ctx, subject.MsgCanonicalCreated(siteID), data, msg.ID) } + +// Sync DM endpoint handlers (chat.server.request.room.{siteID}.create.dm). + +var ( + errMissingRequestID = errors.New("missing X-Request-ID header") + errInvalidRequestID = errors.New("invalid X-Request-ID header") + errInvalidSyncDMRequest = errors.New("invalid sync DM request") + errUserLookupFailed = errors.New("user lookup failed") + errCrossSiteRequester = errors.New("requester is not on this site") + errRoomIDCollision = errors.New("room ID collision (existing room metadata mismatch)") +) + +// sanitizeSyncDMError surfaces sentinel messages; masks anything else as "internal error". +func sanitizeSyncDMError(err error) string { + if err == nil { + return "" + } + switch { + case errors.Is(err, errMissingRequestID), + errors.Is(err, errInvalidRequestID), + errors.Is(err, errInvalidSyncDMRequest), + errors.Is(err, errUserLookupFailed), + errors.Is(err, errCrossSiteRequester): + return err.Error() + default: + return "internal error" + } +} + +// handleSyncCreateDM creates a DM/botDM room + 2 subs and returns the requester's sub. +func (h *Handler) handleSyncCreateDM(ctx context.Context, data []byte) (*model.SyncCreateDMReply, error) { + requestID := natsutil.RequestIDFromContext(ctx) + if requestID == "" { + return nil, errMissingRequestID + } + if !idgen.IsValidUUID(requestID) { + return nil, errInvalidRequestID + } + + var req model.SyncCreateDMRequest + if err := json.Unmarshal(data, &req); err != nil { + return nil, errInvalidSyncDMRequest + } + if err := validateSyncCreateDMShape(&req); err != nil { + return nil, err + } + + users, err := h.store.FindUsersByAccounts(ctx, []string{req.RequesterAccount, req.OtherAccount}) + if err != nil { + return nil, fmt.Errorf("find dm users: %w", err) + } + byAccount := make(map[string]*model.User, len(users)) + for i := range users { + byAccount[users[i].Account] = &users[i] + } + requester, ok := byAccount[req.RequesterAccount] + if !ok { + return nil, errUserLookupFailed + } + if requester.SiteID != h.siteID { + return nil, errCrossSiteRequester + } + other, ok := byAccount[req.OtherAccount] + if !ok { + return nil, errUserLookupFailed + } + + acceptedAt := time.Now().UTC() + roomID := idgen.BuildDMRoomID(requester.ID, other.ID) + + // DMs/botDMs have a fixed 2-member roster — set counts at creation; no Reconcile needed. + userCount, appCount := 2, 0 + if req.RoomType == model.RoomTypeBotDM { + userCount, appCount = 1, 1 + } + + room := &model.Room{ + ID: roomID, + Name: "", + Type: req.RoomType, + CreatedBy: requester.ID, + SiteID: h.siteID, + UserCount: userCount, + AppCount: appCount, + CreatedAt: acceptedAt, + UpdatedAt: acceptedAt, + } + if err := h.store.CreateRoom(ctx, room); err != nil { + if !mongo.IsDuplicateKeyError(err) { + return nil, fmt.Errorf("create room: %w", err) + } + existing, fetchErr := h.store.GetRoom(ctx, room.ID) + if fetchErr != nil { + return nil, fmt.Errorf("fetch room on duplicate-key: %w", fetchErr) + } + if existing.Type != room.Type || + existing.SiteID != room.SiteID || + existing.Name != room.Name || + existing.CreatedBy != room.CreatedBy { + slog.Error("sync DM: room ID collision", + "roomID", room.ID, + "existingType", existing.Type, "wantType", room.Type, + "existingSiteID", existing.SiteID, "wantSiteID", room.SiteID, + "existingCreatedBy", existing.CreatedBy, "wantCreatedBy", room.CreatedBy, + "requestID", requestID) + return nil, errRoomIDCollision + } + room = existing + acceptedAt = existing.CreatedAt + } + + // validateSyncCreateDMShape already gated this to {dm, botDM}. + var subs []*model.Subscription + if req.RoomType == model.RoomTypeBotDM { + subs = buildBotDMSubs(requester, other, room, acceptedAt) + } else { + subs = buildDMSubs(requester, other, room, acceptedAt) + } + + if err := h.store.BulkCreateSubscriptions(ctx, subs); err != nil { + return nil, fmt.Errorf("bulk create subs: %w", err) + } + // Re-read canonical subs: BulkCreateSubscriptions swallows dup-key races, so the + // in-memory subs may carry IDs/JoinedAt that never made it to Mongo. Publish from + // the persisted pair instead. + requesterSub, err := h.store.FindDMSubscription(ctx, requester.Account, other.Account) + if err != nil { + return nil, fmt.Errorf("find requester sub after insert: %w", err) + } + otherSub, err := h.store.FindDMSubscription(ctx, other.Account, requester.Account) + if err != nil { + return nil, fmt.Errorf("find counterpart sub after insert: %w", err) + } + + h.publishSubscriptionUpdates(ctx, []*model.Subscription{requesterSub, otherSub}, requestID) + + // Outbox failure means the remote site won't learn about the room; fail the request. + if err := h.publishSyncDMOutbox(ctx, room, requester, other, acceptedAt); err != nil { + return nil, fmt.Errorf("publish room_created outbox: %w", err) + } + + return &model.SyncCreateDMReply{Success: true, Subscription: *requesterSub}, nil +} + +func validateSyncCreateDMShape(req *model.SyncCreateDMRequest) error { + switch req.RoomType { + case model.RoomTypeDM, model.RoomTypeBotDM: + default: + return errInvalidSyncDMRequest + } + if req.RequesterAccount == "" || req.OtherAccount == "" { + return errInvalidSyncDMRequest + } + if req.RequesterAccount == req.OtherAccount { + return errInvalidSyncDMRequest + } + return nil +} + +func (h *Handler) publishSubscriptionUpdates(ctx context.Context, subs []*model.Subscription, requestID string) { + for _, sub := range subs { + evt := model.SubscriptionUpdateEvent{ + UserID: sub.User.ID, + Subscription: *sub, + Action: "added", + Timestamp: time.Now().UTC().UnixMilli(), + } + data, err := json.Marshal(evt) + if err != nil { + slog.Error("sync DM: marshal subscription.update failed", + "error", err, "account", sub.User.Account, "requestID", requestID) + continue + } + if err := h.publish(ctx, subject.SubscriptionUpdate(sub.User.Account), data, ""); err != nil { + slog.Error("sync DM: publish subscription.update failed", + "error", err, "account", sub.User.Account, "requestID", requestID) + } + } +} + +func (h *Handler) publishSyncDMOutbox(ctx context.Context, room *model.Room, requester, other *model.User, acceptedAt time.Time) error { + if other.SiteID == "" || other.SiteID == h.siteID { + return nil + } + + payload := model.RoomCreatedOutbox{ + RoomID: room.ID, + RoomType: room.Type, + RoomName: "", + HomeSiteID: room.SiteID, + Accounts: []string{other.Account}, + RequesterAccount: requester.Account, + Timestamp: acceptedAt.UnixMilli(), + } + pData, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshal room_created outbox payload: %w", err) + } + envelope := model.OutboxEvent{ + Type: model.OutboxTypeRoomCreated, + SiteID: room.SiteID, + DestSiteID: other.SiteID, + Payload: pData, + Timestamp: time.Now().UTC().UnixMilli(), + } + eData, err := json.Marshal(envelope) + if err != nil { + return fmt.Errorf("marshal outbox envelope: %w", err) + } + payloadSeed := fmt.Sprintf("%s:%s:%d", room.ID, requester.Account, acceptedAt.UnixMilli()) + return h.publish(ctx, + subject.Outbox(room.SiteID, other.SiteID, model.OutboxTypeRoomCreated), + eData, + outboxDedupID(ctx, other.SiteID, payloadSeed), + ) +} + +// natsServerCreateDM is the NATS entry point for chat.server.request.room.{siteID}.create.dm. +func (h *Handler) natsServerCreateDM(m otelnats.Msg) { + ctx := natsutil.ContextWithRequestIDFromHeaders(m.Context(), m.Msg.Header) + reply, err := h.handleSyncCreateDM(ctx, m.Msg.Data) + if err != nil { + slog.Error("sync DM: handler failed", + "error", err, "subject", m.Msg.Subject, + "requestID", natsutil.RequestIDFromContext(ctx)) + natsutil.ReplyError(m.Msg, sanitizeSyncDMError(err)) + return + } + natsutil.ReplyJSON(m.Msg, reply) +} diff --git a/room-worker/handler_test.go b/room-worker/handler_test.go index 97f71e299..a689fd82f 100644 --- a/room-worker/handler_test.go +++ b/room-worker/handler_test.go @@ -7,6 +7,7 @@ import ( "fmt" "slices" "strings" + "sync" "testing" "time" @@ -22,9 +23,8 @@ import ( ) type publishedMsg struct { - subj string - data []byte - msgID string + subj string + data []byte } func TestHandler_ProcessRoleUpdate_Promote(t *testing.T) { @@ -1575,45 +1575,7 @@ func TestProcessAddMembers_PopulatesSubName(t *testing.T) { assert.Equal(t, model.RoomTypeChannel, capturedSubs[0].RoomType) } -// Task 14b: HistoryModeNone with explicit SharedSince — sub.HistorySharedSince must match. -func TestProcessAddMembers_HistoryNone_WithExplicitTimestamp(t *testing.T) { - h, mockStore, _ := newAddMembersTestHandler(t) - ctx := natsutil.WithRequestID(context.Background(), "0193abcd-0193-7abc-89ab-0193abcd0001") - - explicitMs := int64(1700000000000) - mockStore.EXPECT().GetRoom(gomock.Any(), "r1").Return(&model.Room{ - ID: "r1", Name: "deal team", Type: model.RoomTypeChannel, SiteID: "site-A", - }, nil) - mockStore.EXPECT().ListNewMembers(gomock.Any(), gomock.Any(), gomock.Any(), "r1"). - Return([]string{"bob"}, nil) - mockStore.EXPECT().FindUsersByAccounts(gomock.Any(), []string{"bob"}).Return([]model.User{ - {ID: "u_bob", Account: "bob", SiteID: "site-A", EngName: "X", ChineseName: "X"}, - }, nil) - var capturedSubs []*model.Subscription - mockStore.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()). - DoAndReturn(func(_ context.Context, subs []*model.Subscription) error { - capturedSubs = subs - return nil - }) - mockStore.EXPECT().HasOrgRoomMembers(gomock.Any(), "r1").Return(false, nil) - mockStore.EXPECT().ReconcileMemberCounts(gomock.Any(), "r1").Return(nil) - - body, err := json.Marshal(model.AddMembersRequest{ - RoomID: "r1", Users: []string{"bob"}, - RequesterID: "u_alice", RequesterAccount: "alice", - Timestamp: 1740000000000, - History: model.HistoryConfig{Mode: model.HistoryModeNone, SharedSince: &explicitMs}, - }) - require.NoError(t, err) - require.NoError(t, h.processAddMembers(ctx, body)) - - require.Len(t, capturedSubs, 1) - require.NotNil(t, capturedSubs[0].HistorySharedSince) - wantTime := time.UnixMilli(explicitMs).UTC() - assert.Equal(t, wantTime, *capturedSubs[0].HistorySharedSince) -} - -// Task 14b: HistoryModeNone without explicit SharedSince — sub.HistorySharedSince falls back to acceptedAt. +// Task 14b: HistoryModeNone — sub.HistorySharedSince falls back to acceptedAt (req.Timestamp). func TestProcessAddMembers_HistoryNone_NoTimestamp(t *testing.T) { h, mockStore, _ := newAddMembersTestHandler(t) ctx := natsutil.WithRequestID(context.Background(), "0193abcd-0193-7abc-89ab-0193abcd0002") @@ -2395,347 +2357,542 @@ func TestSanitizeAsyncJobError_NonPermanentCollapsed(t *testing.T) { assert.Equal(t, "operation failed", got) } -// Caller-supplied req.History.SharedSince must propagate identically to the -// local subscription, the per-user SubscriptionUpdateEvent fan-out, and the -// MemberAddEvent published locally — without it, the event would carry -// req.Timestamp instead and remote-site sub creation would diverge from -// home-site sub creation. -func TestHandler_ProcessAddMembers_HistorySharedSinceWinsOverTimestamp(t *testing.T) { - ctrl := gomock.NewController(t) - store := NewMockSubscriptionStore(ctrl) - - var published []publishedMsg - publish := func(_ context.Context, subj string, data []byte, _ string) error { - published = append(published, publishedMsg{subj: subj, data: data}) - return nil - } - h := NewHandler(store, "site-a", publish) - - const sharedSince int64 = 500 - const timestamp int64 = 1500 - require.NotEqual(t, sharedSince, timestamp, "test premise: explicit cutoff must differ from acceptedAt") - - store.EXPECT().GetRoom(gomock.Any(), "r1").Return(&model.Room{ID: "r1", SiteID: "site-a", Type: model.RoomTypeChannel}, nil) - store.EXPECT().ListNewMembers(gomock.Any(), nil, []string{"bob"}, "r1").Return([]string{"bob"}, nil) - store.EXPECT().FindUsersByAccounts(gomock.Any(), []string{"bob"}).Return([]model.User{ - {ID: "u_bob", Account: "bob", SiteID: "site-a", EngName: "Bob", ChineseName: "鮑伯"}, - }, nil) - store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).DoAndReturn( - func(_ context.Context, subs []*model.Subscription) error { - require.Len(t, subs, 1) - require.NotNil(t, subs[0].HistorySharedSince, "explicit cutoff must reach sub") - assert.Equal(t, time.UnixMilli(sharedSince).UTC(), *subs[0].HistorySharedSince, - "sub HistorySharedSince must equal req.History.SharedSince, not req.Timestamp") - return nil - }) - store.EXPECT().ReconcileMemberCounts(gomock.Any(), "r1").Return(nil) - store.EXPECT().HasOrgRoomMembers(gomock.Any(), "r1").Return(false, nil) +// newRequestCtx returns a context carrying a syntactically-valid X-Request-ID. +func newRequestCtx() context.Context { + return natsutil.WithRequestID(context.Background(), "01970a4f-8c2d-7c9a-abcd-e0123456789f") +} - ss := sharedSince - req := model.AddMembersRequest{ - RoomID: "r1", - Users: []string{"bob"}, - RequesterAccount: "alice", - Timestamp: timestamp, - History: model.HistoryConfig{Mode: model.HistoryModeNone, SharedSince: &ss}, - } - reqData, _ := json.Marshal(req) - ctx := natsutil.WithRequestID(context.Background(), "req-shared-since-precedence") - require.NoError(t, h.processAddMembers(ctx, reqData)) +// dmCapturedPublish + dmPublishCapture are unit-test-local equivalents of the +// integration_test.go types; same shape under a different name to avoid collision +// when both files compile together under the `integration` build tag. +type dmCapturedPublish struct { + subject string + data []byte + msgID string +} - addEvt, _ := findMemberAddEvent(t, published, "r1") - require.NotNil(t, addEvt.HistorySharedSince, "MemberAddEvent must carry the explicit cutoff") - assert.Equal(t, sharedSince, *addEvt.HistorySharedSince, - "MemberAddEvent.HistorySharedSince must equal req.History.SharedSince, not req.Timestamp") +type dmPublishCapture struct { + mu sync.Mutex + captured []dmCapturedPublish } -// findInboxPublish returns the message published on the local INBOX subject, -// or fails the test if none / more than one were captured. -func findInboxPublish(t *testing.T, published []publishedMsg, want string) publishedMsg { - t.Helper() - var matches []publishedMsg - for _, p := range published { - if p.subj == want { - matches = append(matches, p) - } - } - require.Lenf(t, matches, 1, "expected exactly 1 publish to %s, got %d (all: %+v)", want, len(matches), published) - return matches[0] +func (c *dmPublishCapture) fn(_ context.Context, subj string, data []byte, msgID string) error { + c.mu.Lock() + defer c.mu.Unlock() + c.captured = append(c.captured, dmCapturedPublish{subject: subj, data: append([]byte(nil), data...), msgID: msgID}) + return nil } -func TestHandler_processAddMembers_PublishesLocalInbox(t *testing.T) { +// newSyncDMTestHandler builds a Handler wired to a fresh mock store + capture. +// Mirrors newAddMembersTestHandler's shape for consistency. +func newSyncDMTestHandler(t *testing.T) (*Handler, *MockSubscriptionStore, *dmPublishCapture) { + t.Helper() ctrl := gomock.NewController(t) store := NewMockSubscriptionStore(ctrl) + capture := &dmPublishCapture{} + h := &Handler{siteID: "site-a", store: store, publish: capture.fn} + return h, store, capture +} - const ( - roomID = "r1" - siteID = "site-a" - requester = "alice" - ) - - store.EXPECT().GetRoom(gomock.Any(), roomID).Return(&model.Room{ID: roomID, SiteID: siteID}, nil) - store.EXPECT().ListNewMembers(gomock.Any(), nil, []string{"charlie", "bob", "carol"}, roomID). - Return([]string{"charlie", "bob", "carol"}, nil) - store.EXPECT().FindUsersByAccounts(gomock.Any(), []string{"charlie", "bob", "carol"}).Return([]model.User{ - {ID: "u-charlie", Account: "charlie", SiteID: siteID}, - {ID: "u-bob", Account: "bob", SiteID: "site-b"}, - {ID: "u-carol", Account: "carol", SiteID: "site-c"}, - }, nil) - store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) - store.EXPECT().ReconcileMemberCounts(gomock.Any(), roomID).Return(nil) - store.EXPECT().HasOrgRoomMembers(gomock.Any(), roomID).Return(false, nil) - - var published []publishedMsg - h := NewHandler(store, siteID, func(_ context.Context, subj string, data []byte, msgID string) error { - published = append(published, publishedMsg{subj: subj, data: data, msgID: msgID}) - return nil - }) +// marshalReq JSON-encodes v or fails the test on error. +func marshalReq(t *testing.T, v any) []byte { + t.Helper() + data, err := json.Marshal(v) + require.NoError(t, err) + return data +} - req := model.AddMembersRequest{ - RoomID: roomID, - Users: []string{"charlie", "bob", "carol"}, - RequesterAccount: requester, - Timestamp: 12345, - History: model.HistoryConfig{Mode: model.HistoryModeAll}, +func TestSanitizeSyncDMError(t *testing.T) { + cases := []struct { + name string + in error + want string + }{ + {"nil returns empty", nil, ""}, + {"missing request ID surfaced", errMissingRequestID, "missing X-Request-ID header"}, + {"invalid request ID surfaced", errInvalidRequestID, "invalid X-Request-ID header"}, + {"invalid sync DM request surfaced", errInvalidSyncDMRequest, "invalid sync DM request"}, + {"user lookup failed surfaced", errUserLookupFailed, "user lookup failed"}, + {"cross-site requester surfaced", errCrossSiteRequester, "requester is not on this site"}, + {"room ID collision masked as internal", errRoomIDCollision, "internal error"}, + {"unknown error masked as internal", errors.New("mongo: connection refused"), "internal error"}, } - reqData, _ := json.Marshal(req) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, sanitizeSyncDMError(tc.in)) + }) + } +} - ctx := natsutil.WithRequestID(context.Background(), "req-add-local-inbox") - require.NoError(t, h.processAddMembers(ctx, reqData)) +func TestHandleSyncCreateDM_MissingRequestID(t *testing.T) { + h := &Handler{siteID: "site-a"} + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(context.Background(), data) + assert.ErrorIs(t, err, errMissingRequestID) +} - got := findInboxPublish(t, published, subject.InboxMemberAdded(siteID)) +func TestHandleSyncCreateDM_InvalidJSON(t *testing.T) { + h := &Handler{siteID: "site-a"} + _, err := h.handleSyncCreateDM(newRequestCtx(), []byte("{not json")) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) +} - var outboxEvt model.OutboxEvent - require.NoError(t, json.Unmarshal(got.data, &outboxEvt)) - assert.Equal(t, "member_added", outboxEvt.Type) - assert.Equal(t, siteID, outboxEvt.SiteID, "origin site must equal h.siteID") - assert.Equal(t, siteID, outboxEvt.DestSiteID, "self-loop publish: dest must equal origin") - assert.Greater(t, outboxEvt.Timestamp, int64(0), "OutboxEvent.Timestamp must be set") +func TestHandleSyncCreateDM_InvalidRoomType(t *testing.T) { + h := &Handler{siteID: "site-a"} + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeChannel, + RequesterAccount: "alice", + OtherAccount: "bob", + } + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) +} - var inner model.MemberAddEvent - require.NoError(t, json.Unmarshal(outboxEvt.Payload, &inner)) - assert.Equal(t, "member_added", inner.Type) - assert.Equal(t, roomID, inner.RoomID) - assert.Equal(t, siteID, inner.SiteID) - assert.ElementsMatch(t, []string{"charlie", "bob", "carol"}, inner.Accounts, - "local INBOX must carry full add set — same-site + cross-site") +func TestHandleSyncCreateDM_EmptyAccounts(t *testing.T) { + h := &Handler{siteID: "site-a"} + cases := []model.SyncCreateDMRequest{ + {RoomType: model.RoomTypeDM, RequesterAccount: "", OtherAccount: "bob"}, + {RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: ""}, + } + for _, req := range cases { + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) + } +} - wantMsgID := outboxDedupID(ctx, siteID, fmt.Sprintf("%s:%s:%d", roomID, requester, req.Timestamp)) - assert.Equal(t, wantMsgID, got.msgID, "Nats-Msg-Id must follow outboxDedupID(ctx, siteID, payloadSeed)") +func TestHandleSyncCreateDM_SelfDM(t *testing.T) { + h := &Handler{siteID: "site-a"} + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "alice", + } + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errInvalidSyncDMRequest) } -func TestHandler_processAddMembers_NoLocalInboxOnEmptyAccounts(t *testing.T) { - ctrl := gomock.NewController(t) - store := NewMockSubscriptionStore(ctrl) +func TestHandleSyncCreateDM_RequesterNotFound(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) - store.EXPECT().GetRoom(gomock.Any(), "r1").Return(&model.Room{ID: "r1", SiteID: "site-a"}, nil) - store.EXPECT().ListNewMembers(gomock.Any(), nil, []string{"bob"}, "r1").Return(nil, nil) + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return(nil, nil) - var published []publishedMsg - h := NewHandler(store, "site-a", func(_ context.Context, subj string, data []byte, msgID string) error { - published = append(published, publishedMsg{subj: subj, data: data, msgID: msgID}) - return nil - }) + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", + } + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errUserLookupFailed) +} - req := model.AddMembersRequest{RoomID: "r1", Users: []string{"bob"}, RequesterAccount: "alice", Timestamp: 1000} - reqData, _ := json.Marshal(req) +func TestHandleSyncCreateDM_OtherNotFound(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) - ctx := natsutil.WithRequestID(context.Background(), "req-empty-accounts") - require.NoError(t, h.processAddMembers(ctx, reqData)) + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{{ID: "u-alice", Account: "alice", SiteID: "site-a"}}, nil) - for _, p := range published { - assert.NotEqual(t, subject.InboxMemberAdded("site-a"), p.subj, - "no local INBOX publish should occur when actualAccounts is empty") + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", } + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errUserLookupFailed) } -func TestHandler_processRemoveIndividual_PublishesLocalInbox(t *testing.T) { - ctrl := gomock.NewController(t) - store := NewMockSubscriptionStore(ctrl) +func TestHandleSyncCreateDM_CrossSiteRequester(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) - const ( - roomID = "r1" - account = "bob" - requester = "alice" - siteID = "site-a" - ) + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{{ID: "u-alice", Account: "alice", SiteID: "site-b"}}, nil) - userResult := &UserWithMembership{ - User: model.User{ID: "u-bob", Account: account, SiteID: siteID}, - HasOrgMembership: false, + req := model.SyncCreateDMRequest{ + RoomType: model.RoomTypeDM, + RequesterAccount: "alice", + OtherAccount: "bob", } + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errCrossSiteRequester) +} - store.EXPECT().GetUserWithMembership(gomock.Any(), roomID, account).Return(userResult, nil) - store.EXPECT().DeleteRoomMember(gomock.Any(), roomID, model.RoomMemberIndividual, "u-bob").Return(nil) - store.EXPECT().DeleteSubscription(gomock.Any(), roomID, account).Return(int64(1), nil) - store.EXPECT().ReconcileMemberCounts(gomock.Any(), roomID).Return(nil) - - var published []publishedMsg - h := NewHandler(store, siteID, func(_ context.Context, subj string, data []byte, msgID string) error { - published = append(published, publishedMsg{subj: subj, data: data, msgID: msgID}) - return nil - }) +func TestHandleSyncCreateDM_RoomCollisionMismatch(t *testing.T) { + roomID := idgen.BuildDMRoomID("u-alice", "u-bob") + cases := []struct { + name string + existing model.Room + }{ + {"type mismatch", model.Room{ID: roomID, Type: model.RoomTypeChannel, SiteID: "site-a", Name: "", CreatedBy: "u-alice"}}, + {"siteID mismatch", model.Room{ID: roomID, Type: model.RoomTypeDM, SiteID: "site-other", Name: "", CreatedBy: "u-alice"}}, + {"name mismatch", model.Room{ID: roomID, Type: model.RoomTypeDM, SiteID: "site-a", Name: "leak", CreatedBy: "u-alice"}}, + {"createdBy mismatch", model.Room{ID: roomID, Type: model.RoomTypeDM, SiteID: "site-a", Name: "", CreatedBy: "u-eve"}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + dupErr := mongo.WriteException{WriteErrors: []mongo.WriteError{{Code: 11000}}} + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(dupErr) + existing := tc.existing + store.EXPECT().GetRoom(gomock.Any(), gomock.Any()).Return(&existing, nil) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + assert.ErrorIs(t, err, errRoomIDCollision) + }) + } +} - req := model.RemoveMemberRequest{RoomID: roomID, Requester: requester, Account: account, Timestamp: 9999} - reqData, _ := json.Marshal(req) +func TestHandleSyncCreateDM_DM_PersistsSubsAndReturnsRequester(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) - ctx := natsutil.WithRequestID(context.Background(), "req-rm-individual-inbox") - require.NoError(t, h.processRemoveMember(ctx, reqData)) + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + var insertedRoom *model.Room + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, r *model.Room) error { + insertedRoom = r + return nil + }) - got := findInboxPublish(t, published, subject.InboxMemberRemoved(siteID)) + var captured []*model.Subscription + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, subs []*model.Subscription) error { + captured = subs + return nil + }) + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(&model.Subscription{ + ID: "canonical-alice-sub", + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, + RoomID: idgen.BuildDMRoomID("u-alice", "u-bob"), + Name: "bob", + RoomType: model.RoomTypeDM, + }, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "bob", "alice").Return(&model.Subscription{ + ID: "canonical-bob-sub", + User: model.SubscriptionUser{ID: "u-bob", Account: "bob"}, + RoomID: idgen.BuildDMRoomID("u-alice", "u-bob"), + Name: "alice", + RoomType: model.RoomTypeDM, + }, nil) - var outboxEvt model.OutboxEvent - require.NoError(t, json.Unmarshal(got.data, &outboxEvt)) - assert.Equal(t, "member_removed", outboxEvt.Type) - assert.Equal(t, siteID, outboxEvt.SiteID) - assert.Equal(t, siteID, outboxEvt.DestSiteID) - assert.Greater(t, outboxEvt.Timestamp, int64(0)) + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + reply, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + require.NotNil(t, reply) + assert.True(t, reply.Success) + assert.Equal(t, "canonical-alice-sub", reply.Subscription.ID) + + // DM room is inserted with userCount=2, appCount=0 — no Reconcile needed. + require.NotNil(t, insertedRoom) + assert.Equal(t, 2, insertedRoom.UserCount) + assert.Equal(t, 0, insertedRoom.AppCount) + + // Both subs persisted: requester names other (IsSubscribed=false), other names requester. + require.Len(t, captured, 2) + roomID := idgen.BuildDMRoomID("u-alice", "u-bob") + subByAccount := map[string]*model.Subscription{} + for _, s := range captured { + subByAccount[s.User.Account] = s + } + require.Contains(t, subByAccount, "alice") + require.Contains(t, subByAccount, "bob") + assert.Equal(t, "u-alice", subByAccount["alice"].User.ID) + assert.Equal(t, roomID, subByAccount["alice"].RoomID) + assert.Equal(t, "bob", subByAccount["alice"].Name) + assert.Equal(t, model.RoomTypeDM, subByAccount["alice"].RoomType) + assert.False(t, subByAccount["alice"].IsSubscribed) + assert.Equal(t, "u-bob", subByAccount["bob"].User.ID) + assert.Equal(t, "alice", subByAccount["bob"].Name) + assert.False(t, subByAccount["bob"].IsSubscribed) +} + +func TestHandleSyncCreateDM_BotDM_RequesterSubIsSubscribedTrue(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + bot := &model.User{ID: "u-bot", Account: "helper.bot", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *bot}, nil) + var insertedRoom *model.Room + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, r *model.Room) error { + insertedRoom = r + return nil + }) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "helper.bot").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, IsSubscribed: true, + }, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "helper.bot", "alice").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-bot", Account: "helper.bot"}, IsSubscribed: false, + }, nil) - var inner model.MemberRemoveEvent - require.NoError(t, json.Unmarshal(outboxEvt.Payload, &inner)) - assert.Equal(t, "member_removed", inner.Type, "admin-remove inner type is member_removed") - assert.Equal(t, roomID, inner.RoomID) - assert.Equal(t, []string{account}, inner.Accounts) + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeBotDM, RequesterAccount: "alice", OtherAccount: "helper.bot"} + data := marshalReq(t, req) + reply, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + require.NotNil(t, reply) + assert.True(t, reply.Subscription.IsSubscribed) + assert.Equal(t, "alice", reply.Subscription.User.Account) - wantMsgID := outboxDedupID(ctx, siteID, fmt.Sprintf("%s:%s:%d", roomID, account, req.Timestamp)) - assert.Equal(t, wantMsgID, got.msgID, "Nats-Msg-Id must follow outboxDedupID(ctx, siteID, payloadSeed)") + // botDM room is inserted with userCount=1, appCount=1 — no Reconcile needed. + require.NotNil(t, insertedRoom) + assert.Equal(t, 1, insertedRoom.UserCount) + assert.Equal(t, 1, insertedRoom.AppCount) } -// Self-leave: wrapper Type collapses to member_removed while inner Type -// stays member_left — matches the cross-site OUTBOX convention so -// search-sync-worker dispatches on a single MV op. -func TestHandler_processRemoveIndividual_SelfLeavePublishesLocalInbox(t *testing.T) { - ctrl := gomock.NewController(t) - store := NewMockSubscriptionStore(ctrl) +// On dup-key race, BulkCreateSubscriptions swallows the error and the in-memory subs +// carry stale state; the handler must return the canonical persisted sub via FindDMSubscription. +func TestHandleSyncCreateDM_ReturnsCanonicalPersistedSub(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) - const ( - roomID = "r1" - account = "alice" - siteID = "site-a" - ) + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + existingSub := &model.Subscription{ + ID: "canonical-sub", + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, + RoomID: idgen.BuildDMRoomID("u-alice", "u-bob"), + Name: "bob", + RoomType: model.RoomTypeDM, + } + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(existingSub, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "bob", "alice").Return(&model.Subscription{ + ID: "canonical-bob-sub", User: model.SubscriptionUser{ID: "u-bob", Account: "bob"}, + RoomID: idgen.BuildDMRoomID("u-alice", "u-bob"), Name: "alice", RoomType: model.RoomTypeDM, + }, nil) - userResult := &UserWithMembership{ - User: model.User{ID: "u1", Account: account, SiteID: siteID}, - HasOrgMembership: false, - } + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + reply, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + require.NotNil(t, reply) + assert.Equal(t, "canonical-sub", reply.Subscription.ID) +} - store.EXPECT().GetUserWithMembership(gomock.Any(), roomID, account).Return(userResult, nil) - store.EXPECT().DeleteRoomMember(gomock.Any(), roomID, model.RoomMemberIndividual, "u1").Return(nil) - store.EXPECT().DeleteSubscription(gomock.Any(), roomID, account).Return(int64(1), nil) - store.EXPECT().ReconcileMemberCounts(gomock.Any(), roomID).Return(nil) +// Transient store errors on GetUser must NOT be sanitized as errUserLookupFailed (which +// signals "user does not exist"); they should propagate as wrapped errors and surface +// as "internal error" via sanitizeSyncDMError. +func TestHandleSyncCreateDM_GetUserTransientError_Internal(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) - var published []publishedMsg - h := NewHandler(store, siteID, func(_ context.Context, subj string, data []byte, msgID string) error { - published = append(published, publishedMsg{subj: subj, data: data, msgID: msgID}) - return nil - }) + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return(nil, errors.New("mongo: connection refused")) - req := model.RemoveMemberRequest{RoomID: roomID, Requester: account, Account: account, Timestamp: 1000} - reqData, _ := json.Marshal(req) + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.Error(t, err) + assert.NotErrorIs(t, err, errUserLookupFailed, + "transient error must not be tagged as user-not-found") + assert.Equal(t, "internal error", sanitizeSyncDMError(err)) +} - ctx := natsutil.WithRequestID(context.Background(), "req-rm-self-leave-inbox") - require.NoError(t, h.processRemoveMember(ctx, reqData)) +func TestHandleSyncCreateDM_PublishesSubscriptionUpdateForBothUsers(t *testing.T) { + h, store, capture := newSyncDMTestHandler(t) - got := findInboxPublish(t, published, subject.InboxMemberRemoved(siteID)) + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, + }, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "bob", "alice").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-bob", Account: "bob"}, + }, nil) - var outboxEvt model.OutboxEvent - require.NoError(t, json.Unmarshal(got.data, &outboxEvt)) - assert.Equal(t, "member_removed", outboxEvt.Type, - "OutboxEvent wrapper Type must be member_removed even for self-leave") + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) - var inner model.MemberRemoveEvent - require.NoError(t, json.Unmarshal(outboxEvt.Payload, &inner)) - assert.Equal(t, "member_left", inner.Type, - "inner MemberRemoveEvent.Type is preserved as member_left for self-leave") + subjects := map[string]int{} + for _, p := range capture.captured { + subjects[p.subject]++ + } + assert.Equal(t, 1, subjects[subject.SubscriptionUpdate("alice")]) + assert.Equal(t, 1, subjects[subject.SubscriptionUpdate("bob")]) } -func TestHandler_processRemoveOrg_PublishesLocalInbox(t *testing.T) { - ctrl := gomock.NewController(t) - store := NewMockSubscriptionStore(ctrl) +func TestHandleSyncCreateDM_CrossSite_EmitsOutbox(t *testing.T) { + h, store, capture := newSyncDMTestHandler(t) - const ( - roomID = "r1" - orgID = "org-1" - requester = "alice" - siteID = "site-a" - ) - - orgMembers := []OrgMemberStatus{ - {Account: "carol", SiteID: siteID, SectName: "Eng", HasIndividualMembership: false}, - {Account: "dave", SiteID: "site-b", SectName: "Eng", HasIndividualMembership: false}, - } + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-b"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, + }, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "bob", "alice").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-bob", Account: "bob"}, + }, nil) - store.EXPECT().GetOrgMembersWithIndividualStatus(gomock.Any(), roomID, orgID).Return(orgMembers, nil) - store.EXPECT(). - DeleteSubscriptionsByAccounts(gomock.Any(), roomID, gomock.InAnyOrder([]string{"carol", "dave"})). - Return(int64(2), nil) - store.EXPECT().DeleteRoomMember(gomock.Any(), roomID, model.RoomMemberOrg, orgID).Return(nil) - store.EXPECT().ReconcileMemberCounts(gomock.Any(), roomID).Return(nil) + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) - var published []publishedMsg - h := NewHandler(store, siteID, func(_ context.Context, subj string, data []byte, msgID string) error { - published = append(published, publishedMsg{subj: subj, data: data, msgID: msgID}) - return nil - }) + var outbox *dmCapturedPublish + for i := range capture.captured { + if capture.captured[i].subject == subject.Outbox("site-a", "site-b", model.OutboxTypeRoomCreated) { + outbox = &capture.captured[i] + break + } + } + require.NotNil(t, outbox, "expected an outbox publish to site-b") - req := model.RemoveMemberRequest{RoomID: roomID, Requester: requester, OrgID: orgID, Timestamp: 4242} - reqData, _ := json.Marshal(req) + var env model.OutboxEvent + require.NoError(t, json.Unmarshal(outbox.data, &env)) + assert.Equal(t, model.OutboxEventType(model.OutboxTypeRoomCreated), env.Type) + assert.Equal(t, "site-a", env.SiteID) + assert.Equal(t, "site-b", env.DestSiteID) - ctx := natsutil.WithRequestID(context.Background(), "req-rm-org-inbox") - require.NoError(t, h.processRemoveMember(ctx, reqData)) + var payload model.RoomCreatedOutbox + require.NoError(t, json.Unmarshal(env.Payload, &payload)) + assert.Equal(t, model.RoomTypeDM, payload.RoomType) + assert.Equal(t, "", payload.RoomName) + assert.Equal(t, "site-a", payload.HomeSiteID) + assert.Equal(t, []string{"bob"}, payload.Accounts) + assert.Equal(t, "alice", payload.RequesterAccount) + assert.Equal(t, "01970a4f-8c2d-7c9a-abcd-e0123456789f:site-b", outbox.msgID) +} - got := findInboxPublish(t, published, subject.InboxMemberRemoved(siteID)) +func TestHandleSyncCreateDM_SameSite_NoOutbox(t *testing.T) { + h, store, capture := newSyncDMTestHandler(t) - var outboxEvt model.OutboxEvent - require.NoError(t, json.Unmarshal(got.data, &outboxEvt)) - assert.Equal(t, "member_removed", outboxEvt.Type) - assert.Equal(t, siteID, outboxEvt.SiteID) - assert.Equal(t, siteID, outboxEvt.DestSiteID) + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, + }, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "bob", "alice").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-bob", Account: "bob"}, + }, nil) - var inner model.MemberRemoveEvent - require.NoError(t, json.Unmarshal(outboxEvt.Payload, &inner)) - assert.Equal(t, "member_removed", inner.Type) - assert.Equal(t, roomID, inner.RoomID) - assert.Equal(t, orgID, inner.OrgID) - assert.ElementsMatch(t, []string{"carol", "dave"}, inner.Accounts, - "local INBOX must carry every removed account regardless of site") + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) - wantMsgID := outboxDedupID(ctx, siteID, fmt.Sprintf("%s:%s:%d", roomID, orgID, req.Timestamp)) - assert.Equal(t, wantMsgID, got.msgID, "Nats-Msg-Id must follow outboxDedupID(ctx, siteID, payloadSeed)") + for _, p := range capture.captured { + assert.NotContains(t, p.subject, "outbox.", "no outbox publish expected for same-site DM") + } } -func TestHandler_processRemoveOrg_NoLocalInboxOnZeroAccounts(t *testing.T) { +// Outbox publish failure must fail the request — otherwise the requester sees success +// while the remote site never learns about the room. +func TestHandleSyncCreateDM_OutboxPublishFails_FailsRequest(t *testing.T) { ctrl := gomock.NewController(t) store := NewMockSubscriptionStore(ctrl) + failingPublish := func(_ context.Context, subj string, _ []byte, _ string) error { + if strings.HasPrefix(subj, "outbox.") { + return errors.New("jetstream pubAck failed") + } + return nil + } + h := &Handler{siteID: "site-a", store: store, publish: failingPublish} - const ( - roomID = "r1" - orgID = "org-1" - siteID = "site-a" - ) + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{ + {ID: "u-alice", Account: "alice", SiteID: "site-a"}, + {ID: "u-bob", Account: "bob", SiteID: "site-b"}, + }, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, + }, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "bob", "alice").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-bob", Account: "bob"}, + }, nil) - orgMembers := []OrgMemberStatus{ - {Account: "carol", SiteID: siteID, SectName: "Eng", HasIndividualMembership: true}, - } + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.Error(t, err) + assert.Equal(t, "internal error", sanitizeSyncDMError(err)) +} - store.EXPECT().GetOrgMembersWithIndividualStatus(gomock.Any(), roomID, orgID).Return(orgMembers, nil) - store.EXPECT().DeleteRoomMember(gomock.Any(), roomID, model.RoomMemberOrg, orgID).Return(nil) - store.EXPECT().ReconcileMemberCounts(gomock.Any(), roomID).Return(nil) +// BulkCreateSubscriptions returning a non-dup-key error must surface as "internal error". +func TestHandleSyncCreateDM_BulkCreateSubsTransientError(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) - var published []publishedMsg - h := NewHandler(store, siteID, func(_ context.Context, subj string, data []byte, msgID string) error { - published = append(published, publishedMsg{subj: subj, data: data, msgID: msgID}) - return nil - }) + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(nil) + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()). + Return(errors.New("mongo: connection reset")) - req := model.RemoveMemberRequest{RoomID: roomID, Requester: "alice", OrgID: orgID, Timestamp: 1000} - reqData, _ := json.Marshal(req) + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.Error(t, err) + assert.Equal(t, "internal error", sanitizeSyncDMError(err)) +} + +// On a CreateRoom dup-key with matching existing room (idempotent re-delivery), +// the handler must reuse existing.CreatedAt as acceptedAt — sub.JoinedAt and event +// timestamps reflect the original creation, not retry wall-clock. +func TestHandleSyncCreateDM_IdempotentRecreate_UsesExistingCreatedAt(t *testing.T) { + h, store, _ := newSyncDMTestHandler(t) + + requester := &model.User{ID: "u-alice", Account: "alice", SiteID: "site-a"} + other := &model.User{ID: "u-bob", Account: "bob", SiteID: "site-a"} + store.EXPECT().FindUsersByAccounts(gomock.Any(), gomock.Any()).Return([]model.User{*requester, *other}, nil) + + // CreateRoom hits dup-key; GetRoom returns a matching existing room with a known CreatedAt. + originalCreatedAt := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) + roomID := idgen.BuildDMRoomID("u-alice", "u-bob") + dupErr := mongo.WriteException{WriteErrors: []mongo.WriteError{{Code: 11000}}} + store.EXPECT().CreateRoom(gomock.Any(), gomock.Any()).Return(dupErr) + store.EXPECT().GetRoom(gomock.Any(), gomock.Any()).Return(&model.Room{ + ID: roomID, Type: model.RoomTypeDM, SiteID: "site-a", + Name: "", CreatedBy: "u-alice", + CreatedAt: originalCreatedAt, UpdatedAt: originalCreatedAt, + }, nil) - ctx := natsutil.WithRequestID(context.Background(), "req-rm-org-empty") - require.NoError(t, h.processRemoveMember(ctx, reqData)) + var captured []*model.Subscription + store.EXPECT().BulkCreateSubscriptions(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, subs []*model.Subscription) error { + captured = subs + return nil + }) + store.EXPECT().FindDMSubscription(gomock.Any(), "alice", "bob").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-alice", Account: "alice"}, JoinedAt: originalCreatedAt, + }, nil) + store.EXPECT().FindDMSubscription(gomock.Any(), "bob", "alice").Return(&model.Subscription{ + User: model.SubscriptionUser{ID: "u-bob", Account: "bob"}, JoinedAt: originalCreatedAt, + }, nil) - for _, p := range published { - assert.NotEqual(t, subject.InboxMemberRemoved(siteID), p.subj, - "no local INBOX publish when no accounts are actually removed") + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data := marshalReq(t, req) + _, err := h.handleSyncCreateDM(newRequestCtx(), data) + require.NoError(t, err) + + require.Len(t, captured, 2) + for _, s := range captured { + assert.Equal(t, originalCreatedAt, s.JoinedAt, + "sub.JoinedAt must reflect existing.CreatedAt on idempotent re-delivery, not retry wall-clock") } } diff --git a/room-worker/integration_test.go b/room-worker/integration_test.go index a9375c6be..6a95becab 100644 --- a/room-worker/integration_test.go +++ b/room-worker/integration_test.go @@ -971,3 +971,161 @@ func TestProcessRemoveIndividual_PublishesLocalInbox_Integration(t *testing.T) { assert.Equal(t, []string{"bob"}, inner.Accounts) assert.Equal(t, reqID+":site-A", pubs[0].msgID) } + +// --- Sync DM endpoint integration tests --- + +const integSyncDMRequestID = "01970a4f-8c2d-7c9a-abcd-e0123456789f" + +func newIntegSyncDMCtx() context.Context { + return natsutil.WithRequestID(context.Background(), integSyncDMRequestID) +} + +func TestSyncCreateDM_DM_PersistsRoomAndSubs(t *testing.T) { + ctx := newIntegSyncDMCtx() + db := setupMongo(t) + store := NewMongoStore(db) + siteID := "site-A" + + mustInsertUser(t, db, &model.User{ID: "u-alice", Account: "alice", SiteID: siteID, EngName: "Alice", ChineseName: "愛麗絲"}) + mustInsertUser(t, db, &model.User{ID: "u-bob", Account: "bob", SiteID: siteID, EngName: "Bob", ChineseName: "鮑勃"}) + + cap := &publishCapture{} + handler := NewHandler(store, siteID, cap.fn()) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + got, err := handler.handleSyncCreateDM(ctx, data) + require.NoError(t, err) + require.NotNil(t, got) + assert.True(t, got.Success) + assert.Equal(t, "alice", got.Subscription.User.Account) + + roomID := idgen.BuildDMRoomID("u-alice", "u-bob") + room, err := store.GetRoom(ctx, roomID) + require.NoError(t, err) + assert.Equal(t, model.RoomTypeDM, room.Type) + assert.Equal(t, siteID, room.SiteID) + assert.Equal(t, 2, room.UserCount, "DM room.UserCount set at creation; no Reconcile pass") + assert.Equal(t, 0, room.AppCount) + + subCount, err := db.Collection("subscriptions").CountDocuments(ctx, bson.M{"roomId": roomID}) + require.NoError(t, err) + assert.Equal(t, int64(2), subCount) + + // Two subscription.update publishes — one per user. + subjects := map[string]int{} + cap.mu.Lock() + for _, p := range cap.captured { + subjects[p.subject]++ + } + cap.mu.Unlock() + assert.Equal(t, 1, subjects[subject.SubscriptionUpdate("alice")]) + assert.Equal(t, 1, subjects[subject.SubscriptionUpdate("bob")]) +} + +func TestSyncCreateDM_BotDM_CrossSiteOutbox(t *testing.T) { + db := setupMongo(t) + store := NewMongoStore(db) + siteID := "site-A" + + mustInsertUser(t, db, &model.User{ID: "u-alice", Account: "alice", SiteID: siteID, EngName: "Alice", ChineseName: "愛麗絲"}) + mustInsertUser(t, db, &model.User{ID: "u-bot", Account: "helper.bot", SiteID: "site-B", EngName: "Helper", ChineseName: "助手"}) + + cap := &publishCapture{} + handler := NewHandler(store, siteID, cap.fn()) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeBotDM, RequesterAccount: "alice", OtherAccount: "helper.bot"} + data, _ := json.Marshal(req) + _, err := handler.handleSyncCreateDM(newIntegSyncDMCtx(), data) + require.NoError(t, err) + + pubs := cap.outboxOnPrefix(subject.Outbox(siteID, "site-B", model.OutboxTypeRoomCreated)) + assert.Len(t, pubs, 1, "exactly one outbox to site-B") +} + +func TestSyncCreateDM_RetryIdempotent(t *testing.T) { + ctx := newIntegSyncDMCtx() + db := setupMongo(t) + store := NewMongoStore(db) + siteID := "site-A" + + mustInsertUser(t, db, &model.User{ID: "u-alice", Account: "alice", SiteID: siteID, EngName: "Alice", ChineseName: "愛麗絲"}) + mustInsertUser(t, db, &model.User{ID: "u-bob", Account: "bob", SiteID: siteID, EngName: "Bob", ChineseName: "鮑勃"}) + + cap := &publishCapture{} + handler := NewHandler(store, siteID, cap.fn()) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, _ := json.Marshal(req) + + r1, err := handler.handleSyncCreateDM(ctx, data) + require.NoError(t, err) + r2, err := handler.handleSyncCreateDM(ctx, data) + require.NoError(t, err) + require.NotNil(t, r1) + require.NotNil(t, r2) + assert.Equal(t, r1.Subscription.RoomID, r2.Subscription.RoomID) + + roomID := idgen.BuildDMRoomID("u-alice", "u-bob") + room, err := store.GetRoom(ctx, roomID) + require.NoError(t, err) + assert.Equal(t, roomID, room.ID) + + subCount, err := db.Collection("subscriptions").CountDocuments(ctx, bson.M{"roomId": roomID}) + require.NoError(t, err) + assert.Equal(t, int64(2), subCount, "redelivery must not create duplicate subs") +} + +// Federation convergence: the cross-site OUTBOX payload carries the deterministic +// BuildDMRoomID, so the remote inbox-worker (and any replay) writes to the SAME +// room ID as the home site. Same X-Request-ID across replays produces the same +// Nats-Msg-Id so JetStream dedup blocks duplicates. +func TestSyncCreateDM_CrossSite_OutboxPayloadConverges(t *testing.T) { + ctx := newIntegSyncDMCtx() + db := setupMongo(t) + store := NewMongoStore(db) + siteID := "site-A" + + mustInsertUser(t, db, &model.User{ID: "u-alice", Account: "alice", SiteID: siteID, EngName: "Alice", ChineseName: "愛麗絲"}) + mustInsertUser(t, db, &model.User{ID: "u-bob", Account: "bob", SiteID: "site-B", EngName: "Bob", ChineseName: "鮑勃"}) + + cap1 := &publishCapture{} + handler := NewHandler(store, siteID, cap1.fn()) + + req := model.SyncCreateDMRequest{RoomType: model.RoomTypeDM, RequesterAccount: "alice", OtherAccount: "bob"} + data, err := json.Marshal(req) + require.NoError(t, err) + _, err = handler.handleSyncCreateDM(ctx, data) + require.NoError(t, err) + + // 1. Local Mongo room.ID equals the deterministic BuildDMRoomID. + wantRoomID := idgen.BuildDMRoomID("u-alice", "u-bob") + persisted, err := store.GetRoom(ctx, wantRoomID) + require.NoError(t, err) + assert.Equal(t, wantRoomID, persisted.ID) + + // 2. OUTBOX payload carries the same RoomID + the dedup key includes destSiteID. + pubs := cap1.outboxOnPrefix(subject.Outbox(siteID, "site-B", model.OutboxTypeRoomCreated)) + require.Len(t, pubs, 1) + var env model.OutboxEvent + require.NoError(t, json.Unmarshal(pubs[0].data, &env)) + var payload model.RoomCreatedOutbox + require.NoError(t, json.Unmarshal(env.Payload, &payload)) + assert.Equal(t, wantRoomID, payload.RoomID, + "outbox RoomID must match local room.ID so remote site converges") + assert.Equal(t, "alice", payload.RequesterAccount) + assert.Equal(t, []string{"bob"}, payload.Accounts) + assert.Contains(t, pubs[0].msgID, "site-B", + "Nats-Msg-Id must include destSiteID for JetStream stream dedup") + + // 3. Replay with the same X-Request-ID produces the same Nats-Msg-Id — + // on the wire, JetStream OUTBOX dedup would reject the second emit. + cap2 := &publishCapture{} + handler2 := NewHandler(store, siteID, cap2.fn()) + _, err = handler2.handleSyncCreateDM(ctx, data) + require.NoError(t, err) + pubs2 := cap2.outboxOnPrefix(subject.Outbox(siteID, "site-B", model.OutboxTypeRoomCreated)) + require.Len(t, pubs2, 1) + assert.Equal(t, pubs[0].msgID, pubs2[0].msgID, + "replay must produce identical Nats-Msg-Id so broker dedup blocks duplicate cross-site events") +} diff --git a/room-worker/main.go b/room-worker/main.go index fc83c4a43..2775fbe20 100644 --- a/room-worker/main.go +++ b/room-worker/main.go @@ -18,6 +18,7 @@ import ( "github.com/hmchangw/chat/pkg/otelutil" "github.com/hmchangw/chat/pkg/shutdown" "github.com/hmchangw/chat/pkg/stream" + "github.com/hmchangw/chat/pkg/subject" ) type config struct { @@ -90,6 +91,11 @@ func main() { return nil }) + if _, err := nc.QueueSubscribe(subject.RoomCreateDMSync(cfg.SiteID), "room-worker", handler.natsServerCreateDM); err != nil { + slog.Error("subscribe sync DM endpoint failed", "error", err) + os.Exit(1) + } + cons, err := js.CreateOrUpdateConsumer(ctx, streamCfg.Name, jetstream.ConsumerConfig{ Durable: "room-worker", AckPolicy: jetstream.AckExplicitPolicy, }) diff --git a/room-worker/mock_store_test.go b/room-worker/mock_store_test.go index da3290c60..b53654072 100644 --- a/room-worker/mock_store_test.go +++ b/room-worker/mock_store_test.go @@ -1,9 +1,9 @@ // Code generated by MockGen. DO NOT EDIT. -// Source: room-worker/store.go +// Source: github.com/hmchangw/chat/room-worker (interfaces: SubscriptionStore) // // Generated by this command: // -// mockgen -source=room-worker/store.go -destination=room-worker/mock_store_test.go -package=main +// mockgen -destination=mock_store_test.go -package=main . SubscriptionStore // // Package main is a generated GoMock package. @@ -169,6 +169,21 @@ func (mr *MockSubscriptionStoreMockRecorder) DeleteSubscriptionsByAccounts(ctx, return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteSubscriptionsByAccounts", reflect.TypeOf((*MockSubscriptionStore)(nil).DeleteSubscriptionsByAccounts), ctx, roomID, accounts) } +// FindDMSubscription mocks base method. +func (m *MockSubscriptionStore) FindDMSubscription(ctx context.Context, account, targetName string) (*model.Subscription, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "FindDMSubscription", ctx, account, targetName) + ret0, _ := ret[0].(*model.Subscription) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// FindDMSubscription indicates an expected call of FindDMSubscription. +func (mr *MockSubscriptionStoreMockRecorder) FindDMSubscription(ctx, account, targetName any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FindDMSubscription", reflect.TypeOf((*MockSubscriptionStore)(nil).FindDMSubscription), ctx, account, targetName) +} + // FindUsersByAccounts mocks base method. func (m *MockSubscriptionStore) FindUsersByAccounts(ctx context.Context, accounts []string) ([]model.User, error) { m.ctrl.T.Helper() diff --git a/room-worker/store.go b/room-worker/store.go index 53bce881e..3a4909f3e 100644 --- a/room-worker/store.go +++ b/room-worker/store.go @@ -43,6 +43,8 @@ type SubscriptionStore interface { GetRoom(ctx context.Context, roomID string) (*model.Room, error) GetSubscription(ctx context.Context, account, roomID string) (*model.Subscription, error) GetUser(ctx context.Context, account string) (*model.User, error) + // FindDMSubscription returns the requester's dm/botDM sub by Name; ErrSubscriptionNotFound on miss. + FindDMSubscription(ctx context.Context, account, targetName string) (*model.Subscription, error) AddRole(ctx context.Context, account, roomID string, role model.Role) error RemoveRole(ctx context.Context, account, roomID string, role model.Role) error diff --git a/room-worker/store_mongo.go b/room-worker/store_mongo.go index fdf33839b..bf2ca7cb0 100644 --- a/room-worker/store_mongo.go +++ b/room-worker/store_mongo.go @@ -429,3 +429,20 @@ func (s *MongoStore) GetSubscriptionAccounts(ctx context.Context, roomID string) } return accounts, nil } + +// FindDMSubscription returns the requester's dm/botDM sub by Name; ErrSubscriptionNotFound on miss. +func (s *MongoStore) FindDMSubscription(ctx context.Context, account, targetName string) (*model.Subscription, error) { + var sub model.Subscription + err := s.subscriptions.FindOne(ctx, bson.M{ + "u.account": account, + "name": targetName, + "roomType": bson.M{"$in": []model.RoomType{model.RoomTypeDM, model.RoomTypeBotDM}}, + }).Decode(&sub) + if errors.Is(err, mongo.ErrNoDocuments) { + return nil, model.ErrSubscriptionNotFound + } + if err != nil { + return nil, fmt.Errorf("find dm subscription: %w", err) + } + return &sub, nil +}