-
Notifications
You must be signed in to change notification settings - Fork 945
feat: add article for when to use Bloom Filter #7928
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 7 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
aa0c8c1
feat(blog): add bloom filters in Postgres blog with CodeHike demos
ankur-arch fd84e22
refactor(blog): drop AgentPrompt usages, add labeled-step demo runner
ankur-arch 0138da8
fix(blog): runner panel layout and full-output terminal
ankur-arch e085069
docs(blog): add btree primer + bloom-filter-to-bloom-index bridge
ankur-arch 109a797
docs(blog): use B-tree spelling in prose, add to cspell dictionary
ankur-arch fd85ae5
fix(blog): runner terminal highlight extends across scroll, add copy …
ankur-arch 2c28d38
Merge branch 'main' into bloom-filter-blog-ankur
ankur-arch 9bca8e1
docs(blog): lead with bloom filters, rename ambiguous section
ankur-arch 25db701
feat(blog): wire bloom post into postgres-features series, add B-tree…
ankur-arch 03cc01c
feat(blog): register postgres-features series, add author landing pages
ankur-arch ad16f87
fix(blog): BTreeDemo crashed on next; clamp annotation line numbers
ankur-arch fa6b58c
Update apps/blog/content/blog/postgres-bloom-index-the-overlooked-pos…
ankur-arch File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
122 changes: 122 additions & 0 deletions
122
...log/content/blog/postgres-bloom-index-the-overlooked-postgres-feature/BloomDemoRunner.tsx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| import { highlight, type HighlightedCode } from "codehike/code"; | ||
| import { BloomDemoRunnerClient, type RunnerStep } from "./BloomDemoRunnerClient"; | ||
|
|
||
| const SOURCE = `import { create, isDatabaseSuccess } from "create-db"; | ||
| import { Client } from "pg"; | ||
|
|
||
| console.log("Provisioning a temporary Prisma Postgres database (1h TTL)..."); | ||
| const db = await create({ ttl: "1h" }); | ||
| if (!isDatabaseSuccess(db)) throw new Error(db.message); | ||
| console.log(\` claim URL: \${db.claimUrl}\`); | ||
|
|
||
| const client = new Client({ connectionString: db.connectionString! }); | ||
| await client.connect(); | ||
|
|
||
| await client.query(\`CREATE EXTENSION IF NOT EXISTS bloom\`); | ||
| await client.query(\` | ||
| DROP TABLE IF EXISTS cache_entries; | ||
| CREATE TABLE cache_entries ( | ||
| id BIGSERIAL PRIMARY KEY, | ||
| tenant_id TEXT, user_id TEXT, endpoint TEXT, | ||
| locale TEXT, region TEXT, api_version INT, | ||
| payload JSONB | ||
| ); | ||
| \`); | ||
|
|
||
| console.log(\`Seeding \${N.toLocaleString()} rows...\`); | ||
| for (let start = 0; start < N; start += BATCH) { | ||
| // build BATCH rows mixing tenants / users / endpoints / locales / regions / versions | ||
| await client.query(\`INSERT INTO cache_entries (...) VALUES \${placeholders}\`, params); | ||
| } | ||
| await client.query(\`ANALYZE cache_entries\`); | ||
|
|
||
| // A. Six B-tree indexes (one per column) | ||
| for (const c of COLS) { | ||
| await client.query(\`CREATE INDEX btree_\${c} ON cache_entries (\${c})\`); | ||
| } | ||
| const btreeMB = await totalIndexMB(); | ||
| const btreeMs = await runLookups(); | ||
|
|
||
| // B. One bloom index covering all six columns | ||
| await client.query(\` | ||
| CREATE INDEX cache_bloom_idx ON cache_entries | ||
| USING bloom (\${COLS.join(", ")}) | ||
| \`); | ||
| const bloomMB = await totalIndexMB(); | ||
| const bloomMs = await runLookups(); | ||
|
|
||
| const shrink = ((1 - bloomMB / btreeMB) * 100).toFixed(0); | ||
| console.log( | ||
| \`Bloom index is \${shrink}% smaller \` + | ||
| \`(\${bloomMB.toFixed(1)} MB vs \${btreeMB.toFixed(1)} MB)\`, | ||
| );`; | ||
|
|
||
| const STEPS: RunnerStep[] = [ | ||
| { | ||
| title: "Provision DB", | ||
| caption: | ||
| "Spin up a temporary Prisma Postgres database with a 1 hour TTL and connect over pg.", | ||
| lines: { from: 1, to: 10 }, | ||
| output: [ | ||
| "Provisioning a temporary Prisma Postgres database (1h TTL)...", | ||
| " claim URL: https://create-db.prisma.io/claim?projectID=...", | ||
| ], | ||
| }, | ||
| { | ||
| title: "Schema + bloom extension", | ||
| caption: | ||
| "Enable the bloom extension once per database, then create the wide cache_entries table.", | ||
| lines: { from: 12, to: 21 }, | ||
| output: ["Creating cache_entries table and enabling bloom extension..."], | ||
| }, | ||
| { | ||
| title: "Seed 10,000 rows", | ||
| caption: | ||
| "Insert a mix of tenants, users, endpoints, locales, regions, and api versions so the lookups are realistic.", | ||
| lines: { from: 23, to: 28 }, | ||
| output: ["Seeding 10,000 rows...", " seeded in 1.2s"], | ||
| }, | ||
| { | ||
| title: "A: Six B-tree indexes", | ||
| caption: | ||
| "One B-tree per filterable column. Measure the total index size and the time for three lookups.", | ||
| lines: { from: 30, to: 35 }, | ||
| output: [ | ||
| "", | ||
| "A. Six B-tree indexes (one per column)...", | ||
| " index size: 0.5 MB", | ||
| " 3 lookups: 306.5 ms", | ||
| ], | ||
| }, | ||
| { | ||
| title: "B: One bloom index", | ||
| caption: | ||
| "Drop the B-trees and create a single bloom index spanning all six columns. Same three lookups.", | ||
| lines: { from: 37, to: 43 }, | ||
| output: [ | ||
| "", | ||
| "B. One bloom index (all six columns)...", | ||
| " index size: 0.2 MB", | ||
| " 3 lookups: 302.7 ms", | ||
| ], | ||
| }, | ||
| { | ||
| title: "Compare", | ||
| caption: | ||
| "Print the difference. The bloom index is much smaller, and it covers any subset of those six columns.", | ||
| lines: { from: 45, to: 49 }, | ||
| output: [ | ||
| "", | ||
| " Bloom index is 70% smaller (0.2 MB vs 0.5 MB),", | ||
|
ankur-arch marked this conversation as resolved.
Outdated
|
||
| " and one index covers any subset of those six columns.", | ||
| ], | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| }, | ||
| ]; | ||
|
|
||
| export async function BloomDemoRunner() { | ||
| const baseCode = (await highlight( | ||
| { value: SOURCE, lang: "typescript", meta: "" }, | ||
| "github-from-css", | ||
| )) as HighlightedCode; | ||
| return <BloomDemoRunnerClient baseCode={baseCode} steps={STEPS} />; | ||
| } | ||
226 changes: 226 additions & 0 deletions
226
...ntent/blog/postgres-bloom-index-the-overlooked-postgres-feature/BloomDemoRunnerClient.tsx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,226 @@ | ||
| "use client"; | ||
|
|
||
| import { useEffect, useMemo, useRef, useState } from "react"; | ||
| import { InnerLine, Pre, type AnnotationHandler, type HighlightedCode } from "codehike/code"; | ||
| import { Check, ChevronLeft, ChevronRight, Copy, Pause, Play } from "lucide-react"; | ||
|
|
||
| export type RunnerStep = { | ||
| title: string; | ||
| caption: string; | ||
| lines: { from: number; to: number }; | ||
| output: string[]; | ||
| }; | ||
|
|
||
| type Props = { | ||
| baseCode: HighlightedCode; | ||
| steps: RunnerStep[]; | ||
| }; | ||
|
|
||
| const STEP_HOLD_MS = 6500; | ||
|
|
||
| const markHandler: AnnotationHandler = { | ||
| name: "mark", | ||
| AnnotatedLine: ({ annotation, ...props }) => ( | ||
| <InnerLine merge={props} data-mark={annotation.query || "active"} /> | ||
| ), | ||
| }; | ||
|
|
||
| const handlers = [markHandler]; | ||
|
|
||
| function codeForStep(base: HighlightedCode, step: RunnerStep): HighlightedCode { | ||
| const lineMarks = []; | ||
| for (let n = step.lines.from; n <= step.lines.to; n += 1) { | ||
| lineMarks.push({ | ||
| name: "mark", | ||
| query: "active", | ||
| fromLineNumber: n, | ||
| toLineNumber: n, | ||
| }); | ||
| } | ||
| return { | ||
| ...base, | ||
| annotations: [...base.annotations.filter((a) => a.name !== "mark"), ...lineMarks], | ||
| }; | ||
| } | ||
|
|
||
| export function BloomDemoRunnerClient({ baseCode, steps }: Props) { | ||
| const [stepIndex, setStepIndex] = useState(0); | ||
| const [playing, setPlaying] = useState(true); | ||
| const [inView, setInView] = useState(false); | ||
| const [copied, setCopied] = useState(false); | ||
| const containerRef = useRef<HTMLDivElement>(null); | ||
| const codeScrollRef = useRef<HTMLDivElement>(null); | ||
| const terminalRef = useRef<HTMLDivElement>(null); | ||
| const copyTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null); | ||
|
|
||
| useEffect(() => { | ||
| const el = containerRef.current; | ||
| if (!el || typeof IntersectionObserver === "undefined") { | ||
| setInView(true); | ||
| return; | ||
| } | ||
| const obs = new IntersectionObserver(([entry]) => setInView(entry.isIntersecting), { | ||
| threshold: 0.2, | ||
| }); | ||
| obs.observe(el); | ||
| return () => obs.disconnect(); | ||
| }, []); | ||
|
|
||
| useEffect(() => { | ||
| if (!playing || !inView) return; | ||
| const id = setInterval(() => { | ||
| setStepIndex((i) => (i + 1) % steps.length); | ||
| }, STEP_HOLD_MS); | ||
| return () => clearInterval(id); | ||
| }, [playing, inView, steps.length]); | ||
|
|
||
| const step = steps[stepIndex]; | ||
| const code = useMemo(() => codeForStep(baseCode, step), [baseCode, step]); | ||
|
|
||
| useEffect(() => { | ||
| const codeEl = codeScrollRef.current; | ||
| if (codeEl) { | ||
| const highlighted = codeEl.querySelector<HTMLElement>('[data-mark="active"]'); | ||
| if (highlighted) { | ||
| const parent = codeEl; | ||
| const top = highlighted.offsetTop - parent.offsetTop; | ||
| parent.scrollTo({ top: Math.max(0, top - 20), behavior: "smooth" }); | ||
| } | ||
| } | ||
| const termEl = terminalRef.current; | ||
| if (termEl) { | ||
| const active = termEl.querySelector<HTMLElement>('[data-step-state="active"]'); | ||
| if (active) { | ||
| const top = active.offsetTop - termEl.offsetTop; | ||
| termEl.scrollTo({ top: Math.max(0, top - 20), behavior: "smooth" }); | ||
| } | ||
| } | ||
| }, [stepIndex]); | ||
|
|
||
| function goTo(index: number) { | ||
| setPlaying(false); | ||
| setStepIndex(((index % steps.length) + steps.length) % steps.length); | ||
| } | ||
|
|
||
| const allOutput = steps.flatMap((s, i) => s.output.map((line) => ({ line, stepIdx: i }))); | ||
|
|
||
| function copyOutput() { | ||
| const text = allOutput.map((e) => e.line).join("\n"); | ||
| if (!navigator.clipboard) return; | ||
| navigator.clipboard.writeText(text).then(() => { | ||
| setCopied(true); | ||
| if (copyTimeoutRef.current) clearTimeout(copyTimeoutRef.current); | ||
| copyTimeoutRef.current = setTimeout(() => setCopied(false), 1600); | ||
| }); | ||
| } | ||
|
ankur-arch marked this conversation as resolved.
|
||
|
|
||
| useEffect(() => { | ||
| return () => { | ||
| if (copyTimeoutRef.current) clearTimeout(copyTimeoutRef.current); | ||
| }; | ||
| }, []); | ||
|
|
||
| return ( | ||
| <div ref={containerRef} className="runner not-prose"> | ||
| <div className="runner-header"> | ||
| <span className="runner-filename"> | ||
| <span className="runner-filename-dots" aria-hidden="true"> | ||
| <span /> | ||
| <span /> | ||
| <span /> | ||
| </span> | ||
| index.ts | ||
| </span> | ||
| <span className="runner-step-counter"> | ||
| Step {stepIndex + 1} of {steps.length} | ||
| <span className="runner-step-counter-label"> · {step.title}</span> | ||
| </span> | ||
| <div className="runner-nav"> | ||
| <button | ||
| type="button" | ||
| className="runner-toggle" | ||
| onClick={() => goTo(stepIndex - 1)} | ||
| aria-label="Previous step" | ||
| > | ||
| <ChevronLeft size={16} /> | ||
| </button> | ||
| <button | ||
| type="button" | ||
| className="runner-toggle" | ||
| onClick={() => setPlaying((p) => !p)} | ||
| aria-label={playing ? "Pause demo" : "Play demo"} | ||
| > | ||
| {playing ? <Pause size={16} /> : <Play size={16} />} | ||
| </button> | ||
| <button | ||
| type="button" | ||
| className="runner-toggle" | ||
| onClick={() => goTo(stepIndex + 1)} | ||
| aria-label="Next step" | ||
| > | ||
| <ChevronRight size={16} /> | ||
| </button> | ||
| </div> | ||
| </div> | ||
|
|
||
| <div className="runner-steps" role="tablist" aria-label="Demo steps"> | ||
| {steps.map((s, i) => ( | ||
| <button | ||
| key={s.title} | ||
| type="button" | ||
| role="tab" | ||
| aria-selected={i === stepIndex} | ||
| data-active={i === stepIndex ? "true" : undefined} | ||
| className="runner-step-pill" | ||
| onClick={() => goTo(i)} | ||
| > | ||
| <span className="runner-step-pill-num">{i + 1}</span> | ||
| <span className="runner-step-pill-label">{s.title}</span> | ||
| </button> | ||
| ))} | ||
| </div> | ||
|
|
||
| <div className="runner-caption">{step.caption}</div> | ||
|
|
||
| <div className="runner-body"> | ||
| <div className="runner-pane runner-pane-code"> | ||
| <div className="runner-pane-label"> | ||
| <span>index.ts</span> | ||
| </div> | ||
| <div className="runner-code" ref={codeScrollRef}> | ||
| <Pre code={code} handlers={handlers} /> | ||
| </div> | ||
| </div> | ||
| <div className="runner-pane runner-pane-terminal"> | ||
| <div className="runner-pane-label runner-pane-label-terminal"> | ||
| <span>terminal output</span> | ||
| <button | ||
| type="button" | ||
| className="runner-copy" | ||
| onClick={copyOutput} | ||
| aria-label={copied ? "Copied terminal output" : "Copy terminal output"} | ||
| > | ||
| {copied ? <Check size={12} /> : <Copy size={12} />} | ||
| <span>{copied ? "Copied" : "Copy"}</span> | ||
| </button> | ||
| </div> | ||
| <div className="runner-terminal-body" ref={terminalRef}> | ||
| {allOutput.map((entry, i) => { | ||
| const state = | ||
| entry.stepIdx === stepIndex | ||
| ? "active" | ||
| : entry.stepIdx < stepIndex | ||
| ? "past" | ||
| : "future"; | ||
| return ( | ||
| <div key={i} className="runner-terminal-line" data-step-state={state}> | ||
| {entry.line || " "} | ||
| </div> | ||
| ); | ||
| })} | ||
| </div> | ||
| </div> | ||
| </div> | ||
| </div> | ||
| ); | ||
| } | ||
45 changes: 45 additions & 0 deletions
45
...log/content/blog/postgres-bloom-index-the-overlooked-postgres-feature/BloomFilterDemo.tsx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| import { highlight, type HighlightedCode } from "codehike/code"; | ||
| import { BloomFilterDemoClient } from "./BloomFilterDemoClient"; | ||
|
|
||
| const SNIPPETS: string[] = [ | ||
| `const bits = new Uint8Array(16); | ||
| // all zeros, nothing added`, | ||
|
|
||
| `function add(item) { | ||
| for (const i of hashes(item)) { | ||
| bits[i] = 1; | ||
| } | ||
| } | ||
|
|
||
| add("alice"); // hashes -> [2, 7, 11]`, | ||
|
|
||
| `add("bob"); // hashes -> [4, 7, 13] | ||
| // bit 7 was already 1 from alice | ||
| // no way to tell who set it`, | ||
|
|
||
| `function check(item) { | ||
| return hashes(item).every( | ||
| (i) => bits[i] === 1, | ||
| ); | ||
| } | ||
|
|
||
| check("alice"); // [2, 7, 11] all 1 | ||
| // probably present, recheck`, | ||
|
|
||
| `check("carol"); // [0, 5, 9] | ||
| // bits[0] is 0 | ||
| // definitely not present`, | ||
|
|
||
| `check("dave"); // [4, 11, 13] | ||
| // all three happen to be 1 | ||
| // from alice + bob | ||
| // dave was never added | ||
| // false positive`, | ||
| ]; | ||
|
|
||
| export async function BloomFilterDemo() { | ||
| const highlighted = (await Promise.all( | ||
| SNIPPETS.map((value) => highlight({ value, lang: "typescript", meta: "" }, "github-from-css")), | ||
| )) as HighlightedCode[]; | ||
| return <BloomFilterDemoClient snippets={highlighted} />; | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.