From 3c5e74b3c825ac967be7cead77de20c75ceae0ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurent=20P=2E=20Ren=C3=A9=20de=20Cotret?= Date: Sat, 6 Jun 2026 16:00:56 +0200 Subject: [PATCH] wip --- docs/beam-templates/duckdb-parquet-sql.hs | 4 +- docs/beam-templates/postgres-blog-sql.hs | 201 ++++++++++++++++++++++ docs/cookbook/aggregations.md | 143 +++++++++++++++ docs/cookbook/index.md | 128 ++++++++++++++ docs/cookbook/joins.md | 169 ++++++++++++++++++ docs/cookbook/null-handling.md | 109 ++++++++++++ docs/cookbook/selecting-rows.md | 121 +++++++++++++ docs/cookbook/subqueries.md | 110 ++++++++++++ docs/cookbook/window-functions.md | 85 +++++++++ mkdocs.yml | 8 + 10 files changed, 1076 insertions(+), 2 deletions(-) create mode 100644 docs/beam-templates/postgres-blog-sql.hs create mode 100644 docs/cookbook/aggregations.md create mode 100644 docs/cookbook/index.md create mode 100644 docs/cookbook/joins.md create mode 100644 docs/cookbook/null-handling.md create mode 100644 docs/cookbook/selecting-rows.md create mode 100644 docs/cookbook/subqueries.md create mode 100644 docs/cookbook/window-functions.md diff --git a/docs/beam-templates/duckdb-parquet-sql.hs b/docs/beam-templates/duckdb-parquet-sql.hs index dddef20fb..6a24e7c0d 100644 --- a/docs/beam-templates/duckdb-parquet-sql.hs +++ b/docs/beam-templates/duckdb-parquet-sql.hs @@ -2,7 +2,7 @@ {-# LANGUAGE RecursiveDo #-} -- ! BUILD_COMMAND: runhaskell -XStandaloneDeriving -XTypeSynonymInstances -XDeriveGeneric -XOverloadedStrings -XFlexibleContexts -XFlexibleInstances -XTypeFamilies -XTypeApplications -XAllowAmbiguousTypes -XDeriveAnyClass -XPartialTypeSignatures -fno-warn-partial-type-signatures --- ! BUILD_DIR: beam-sqlite/examples/ +-- ! BUILD_DIR: beam-duckdb/examples/ -- ! FORMAT: sql module Main where @@ -78,4 +78,4 @@ main = do print _ = pure () BEAM_PLACEHOLDER - ) \ No newline at end of file + ) diff --git a/docs/beam-templates/postgres-blog-sql.hs b/docs/beam-templates/postgres-blog-sql.hs new file mode 100644 index 000000000..64cb02ebe --- /dev/null +++ b/docs/beam-templates/postgres-blog-sql.hs @@ -0,0 +1,201 @@ +{-# LANGUAGE MultiParamTypeClasses #-} + +-- ! BUILD_COMMAND: runhaskell -XStandaloneDeriving -XTypeSynonymInstances -XDeriveGeneric -XOverloadedStrings -XFlexibleContexts -XFlexibleInstances -XTypeFamilies -XTypeApplications -XAllowAmbiguousTypes -XDeriveAnyClass -XPartialTypeSignatures -fno-warn-partial-type-signatures +-- ! BUILD_DIR: cookbook-examples. +-- ! FORMAT: sql +module Main where + +import Control.Exception (bracket) +import Control.Monad +import Data.Int +import Data.Text (Text) +import Data.Time (Day, fromGregorian) +import Database.Beam +import Database.Beam.Postgres hiding (runBeamPostgresDebug) +import qualified Database.Beam.Postgres as Pg +import Database.PostgreSQL.Simple (close, connectPostgreSQL, execute_) + +data AuthorT f = Author + { _authorId :: Columnar f Int32, + _authorName :: Columnar f Text, + _authorEmail :: Columnar f Text, + _authorBio :: Columnar f (Maybe Text) + } + deriving (Generic) + +type Author = AuthorT Identity + +deriving instance Show Author + +deriving instance Eq Author + +instance Beamable AuthorT + +instance Table AuthorT where + data PrimaryKey AuthorT f = AuthorId (Columnar f Int32) deriving (Generic) + primaryKey = AuthorId . _authorId + +instance Beamable (PrimaryKey AuthorT) + +type AuthorId = PrimaryKey AuthorT Identity + +deriving instance Show AuthorId + +deriving instance Eq AuthorId + +deriving instance Show (PrimaryKey AuthorT (Nullable Identity)) + +deriving instance Eq (PrimaryKey AuthorT (Nullable Identity)) + +data PostT f = Post + { _postId :: Columnar f Int32, + _postTitle :: Columnar f Text, + _postPublishedOn :: Columnar f (Maybe Day), -- Nothing while still a draft + _postAuthor :: PrimaryKey AuthorT f, + _postEditor :: PrimaryKey AuthorT (Maybe f) -- not every post is edited + } + deriving (Generic) + +type Post = PostT Identity + +deriving instance Show Post + +deriving instance Eq Post + +instance Beamable PostT + +instance Table PostT where + data PrimaryKey PostT f = PostId (Columnar f Int32) deriving (Generic) + primaryKey = PostId . _postId + +instance Beamable (PrimaryKey PostT) + +type PostId = PrimaryKey PostT Identity + +deriving instance Show PostId + +deriving instance Eq PostId + +data CommentT f = Comment + { _commentId :: Columnar f Int32, + _commentPost :: PrimaryKey PostT f, + _commentAuthor :: PrimaryKey AuthorT (Maybe f), -- Nothing for anonymous comments + _commentContent :: Columnar f Text, + _commentPostedOn :: Columnar f Day + } + deriving (Generic) + +type Comment = CommentT Identity + +deriving instance Show Comment + +deriving instance Eq Comment + +instance Beamable CommentT + +instance Table CommentT where + data PrimaryKey CommentT f = CommentId (Columnar f Int32) deriving (Generic) + primaryKey = CommentId . _commentId + +instance Beamable (PrimaryKey CommentT) + +type CommentId = PrimaryKey CommentT Identity + +deriving instance Show CommentId + +deriving instance Eq CommentId + +-- * Database + +data BlogDb f = BlogDb + { _blogAuthors :: f (TableEntity AuthorT), + _blogPosts :: f (TableEntity PostT), + _blogComments :: f (TableEntity CommentT) + } + deriving (Generic, Database Postgres) + +blogDb :: DatabaseSettings Postgres BlogDb +blogDb = defaultDbSettings + +-- * Relationships + +authorPosts :: OneToMany Postgres BlogDb s AuthorT PostT +authorPosts = oneToMany_ (_blogPosts blogDb) _postAuthor + +editedPosts :: OneToManyOptional Postgres BlogDb s AuthorT PostT +editedPosts = oneToManyOptional_ (_blogPosts blogDb) _postEditor + +postComments :: OneToMany Postgres BlogDb s PostT CommentT +postComments = oneToMany_ (_blogComments blogDb) _commentPost + +authorComments :: OneToManyOptional Postgres BlogDb s AuthorT CommentT +authorComments = oneToManyOptional_ (_blogComments blogDb) _commentAuthor + +main :: IO () +main = + bracket (connectPostgreSQL "host=localhost port=5432 dbname=postgres") close $ \conn -> do + mapM_ + (execute_ conn) + [ "CREATE TABLE authors \ + \ ( id INT PRIMARY KEY \ + \ , name VARCHAR NOT NULL \ + \ , email VARCHAR NOT NULL \ + \ , bio VARCHAR \ + \ )", + "CREATE TABLE posts \ + \ ( id INT PRIMARY KEY \ + \ , title VARCHAR NOT NULL \ + \ , published_on DATE \ + \ , author__id INT NOT NULL REFERENCES authors(id) \ + \ , editor__id INT REFERENCES authors(id) \ + \ )", + "CREATE TABLE comments \ + \ ( id INT PRIMARY KEY \ + \ , post__id INT NOT NULL REFERENCES posts(id) \ + \ , author__id INT REFERENCES authors(id) \ + \ , content VARCHAR NOT NULL \ + \ , posted_on DATE NOT NULL \ + \ )" + ] + + -- Seed data. Inserted silently, so that only the SQL of the example + -- snippet is shown in the documentation. + runBeamPostgres conn $ do + runInsert $ + insert (_blogAuthors blogDb) $ + insertValues + [ Author 1 "Ada Lovelace" "ada@example.com" (Just "Mathematician and first programmer."), + Author 2 "Grace Hopper" "grace@example.com" Nothing, + Author 3 "Alan Turing" "alan@example.com" (Just "Computer scientist and cryptanalyst.") + ] + + runInsert $ + insert (_blogPosts blogDb) $ + insertValues + [ Post 1 "Notes on the Analytical Engine" (Just (fromGregorian 2024 1 15)) (AuthorId 1) (AuthorId (Just 2)), + Post 2 "Compilers from scratch" (Just (fromGregorian 2024 2 1)) (AuthorId 2) (AuthorId Nothing), + Post 3 "On computable numbers" (Just (fromGregorian 2024 3 10)) (AuthorId 3) (AuthorId (Just 1)), + Post 4 "Thoughts on the future" Nothing (AuthorId 1) (AuthorId Nothing), + Post 5 "Debugging stories" (Just (fromGregorian 2024 4 5)) (AuthorId 2) (AuthorId (Just 3)) + ] + + runInsert $ + insert (_blogComments blogDb) $ + insertValues + [ Comment 1 (PostId 1) (AuthorId (Just 2)) "Fascinating read!" (fromGregorian 2024 1 16), + Comment 2 (PostId 1) (AuthorId Nothing) "Where can I learn more?" (fromGregorian 2024 1 17), + Comment 3 (PostId 2) (AuthorId (Just 3)) "A great introduction." (fromGregorian 2024 2 2), + Comment 4 (PostId 3) (AuthorId (Just 2)) "A classic." (fromGregorian 2024 3 11), + Comment 5 (PostId 3) (AuthorId Nothing) "Mind-blowing." (fromGregorian 2024 3 12), + Comment 6 (PostId 5) (AuthorId (Just 1)) "The moth story never gets old." (fromGregorian 2024 4 6) + ] + + let runBeamPostgresDebug _ = Pg.runBeamPostgresDebug putStrLn + + ( do + -- Don't print the result + let print :: (Show a) => a -> IO () + print _ = pure () + + BEAM_PLACEHOLDER + ) diff --git a/docs/cookbook/aggregations.md b/docs/cookbook/aggregations.md new file mode 100644 index 000000000..cc64bfe81 --- /dev/null +++ b/docs/cookbook/aggregations.md @@ -0,0 +1,143 @@ +These recipes show how to summarize data with `GROUP BY` and aggregate +functions, using the [blog example database](./index.md#the-example-database). +The [aggregates guide](../user-guide/queries/aggregates.md) covers the +machinery in depth. + +## Count the rows of a table + +`aggregate_` introduces aggregation; with no `group_`, all rows form a single +group. `countAll_` is SQL's `COUNT(*)`. + +!beam-query +```haskell +!postgres-blog-sql sql +Just postCount <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningOne + $ select + $ aggregate_ (\_ -> as_ @Int32 countAll_) + $ all_ (_blogPosts blogDb) + +print postCount +``` + +!!! tip "Tip" + `countAll_` can return any `Integral` type, which makes the result type + ambiguous. The `as_ @Int32` annotation resolves the ambiguity. + +## Group and count + +`group_` marks the expressions to group by. Here is the number of comments on +each commented post: + +!beam-query +```haskell +!postgres-blog-sql sql +commentCounts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ aggregate_ (\comment -> ( group_ (_commentPost comment) + , as_ @Int32 countAll_ )) + $ all_ (_blogComments blogDb) + +mapM_ print commentCounts +``` + +With the seed data: two comments each on posts 1 and 3, one each on posts 2 +and 5 — and no row at all for post 4, which has no comments. The next recipe +fixes that. + +## Count children, including those with zero + +To include parents with no children in the counts, start from a `LEFT JOIN` +and count a column of the child table with `count_`. Unlike `COUNT(*)`, +`COUNT(column)` skips `NULL`s, so the all-`NULL` rows produced by the left +join count for zero: + +!beam-query +```haskell +!postgres-blog-sql sql +commentCounts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ aggregate_ (\(title, commentId) -> ( group_ title + , as_ @Int32 (count_ commentId) )) + $ do post <- all_ (_blogPosts blogDb) + comment <- leftJoin_ (all_ (_blogComments blogDb)) + (\comment -> _commentPost comment ==. primaryKey post) + pure (_postTitle post, _commentId comment) + +mapM_ print commentCounts +``` + +This time *Thoughts on the future* appears, with a count of 0. + +## Filter on an aggregate (HAVING) + +Beam has no dedicated `HAVING` syntax: simply `filter_` the result of an +`aggregate_`, and beam emits a `HAVING` clause (or an equivalent subquery) +as appropriate. Here are the posts with at least two comments: + +!beam-query +```haskell +!postgres-blog-sql sql +popularPosts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\(_, commentCount) -> commentCount >=. 2) + $ aggregate_ (\comment -> ( group_ (_commentPost comment) + , as_ @Int32 countAll_ )) + $ all_ (_blogComments blogDb) + +mapM_ print popularPosts +``` + +## MIN and MAX + +The usual SQL aggregates are available with an underscore suffix: `sum_`, +`avg_`, `min_`, `max_`. Aggregating over no rows yields `NULL` in SQL, so +these return `Maybe` values. Here is the date of the latest comment: + +!beam-query +```haskell +!postgres-blog-sql sql +Just latestActivity <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningOne + $ select + $ aggregate_ (\comment -> max_ (_commentPostedOn comment)) + $ all_ (_blogComments blogDb) + +print latestActivity +``` + +Note the two layers of `Maybe`: `runSelectReturningOne` accounts for the +query returning no row, while the inner `Maybe` (from `max_`) accounts for +`MAX` over an empty table. + +## Group by an expression + +Groups need not be plain columns. Here is how many comments are attributed +vs. anonymous, grouping on a computed boolean: + +!beam-query +```haskell +!postgres-blog-sql sql +attribution <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ aggregate_ (\comment -> + let AuthorId author = _commentAuthor comment + in ( group_ (isNothing_ author) + , as_ @Int32 countAll_ )) + $ all_ (_blogComments blogDb) + +mapM_ print attribution +``` + +With the seed data, this returns `(False, 4)` and `(True, 2)`: four +attributed comments and two anonymous ones. diff --git a/docs/cookbook/index.md b/docs/cookbook/index.md new file mode 100644 index 000000000..c0fd85797 --- /dev/null +++ b/docs/cookbook/index.md @@ -0,0 +1,128 @@ +The cookbook is a collection of self-contained recipes. Each recipe shows a +practical task expressed as a beam query, together with the SQL that beam +generates. Recipes are generated by actually running the queries against a +real PostgreSQL database when this documentation is built, so they are +guaranteed to be up-to-date. + +Whereas the [tutorial](../tutorials/tutorial1.md) teaches beam from the ground +up and the user guide documents each feature in depth, the cookbook is +organized by *task*: find the thing you are trying to do, and copy the recipe. + +## The example database + +All recipes run against a small example database modeling a blog. It was +designed to exercise common situations: foreign keys, nullable columns, and +nullable foreign keys. There are three tables: + +| Table | Description | +|------------|--------------------------------------------------------------------------------------------------------------------------------------| +| `authors` | People who write posts and comments. The `bio` column is nullable. | +| `posts` | Blog posts. Every post has an author. The editor is a *nullable* foreign key, and `published_on` is null while a post is still a draft. | +| `comments` | Comments on posts. The author is a *nullable* foreign key: anonymous comments have no author. | + +In Haskell, the schema is declared the usual beam way (see the +[models guide](../user-guide/models.md)): + +```haskell +data AuthorT f = Author + { _authorId :: Columnar f Int32 + , _authorName :: Columnar f Text + , _authorEmail :: Columnar f Text + , _authorBio :: Columnar f (Maybe Text) + } deriving (Generic) + +data PostT f = Post + { _postId :: Columnar f Int32 + , _postTitle :: Columnar f Text + , _postPublishedOn :: Columnar f (Maybe Day) -- Nothing while still a draft + , _postAuthor :: PrimaryKey AuthorT f + , _postEditor :: PrimaryKey AuthorT (Nullable f) -- not every post is edited + } deriving (Generic) + +data CommentT f = Comment + { _commentId :: Columnar f Int32 + , _commentPost :: PrimaryKey PostT f + , _commentAuthor :: PrimaryKey AuthorT (Nullable f) -- Nothing for anonymous comments + , _commentContent :: Columnar f Text + , _commentPostedOn :: Columnar f Day + } deriving (Generic) + +data BlogDb f = BlogDb + { _blogAuthors :: f (TableEntity AuthorT) + , _blogPosts :: f (TableEntity PostT) + , _blogComments :: f (TableEntity CommentT) + } deriving (Generic, Database Postgres) + +blogDb :: DatabaseSettings Postgres BlogDb +blogDb = defaultDbSettings +``` + +The full declaration, including the `Table` instances and some useful +relationship accessors (`authorPosts`, `postComments`, ...), lives in the +documentation template +[`postgres-blog-sql.hs`](https://github.com/haskell-beam/beam/blob/master/docs/beam-templates/postgres-blog-sql.hs). + +### The data + +The database is seeded with a handful of rows, so that you can work out what +each recipe returns. + +**authors** + +| id | name | email | bio | +|----|--------------|-------------------|----------------------------------------| +| 1 | Ada Lovelace | ada@example.com | Mathematician and first programmer. | +| 2 | Grace Hopper | grace@example.com | *null* | +| 3 | Alan Turing | alan@example.com | Computer scientist and cryptanalyst. | + +**posts** + +| id | title | published_on | author__id | editor__id | +|----|---------------------------------|--------------|------------|------------| +| 1 | Notes on the Analytical Engine | 2024-01-15 | 1 | 2 | +| 2 | Compilers from scratch | 2024-02-01 | 2 | *null* | +| 3 | On computable numbers | 2024-03-10 | 3 | 1 | +| 4 | Thoughts on the future | *null* | 1 | *null* | +| 5 | Debugging stories | 2024-04-05 | 2 | 3 | + +**comments** + +| id | post__id | author__id | content | posted_on | +|----|----------|------------|--------------------------------|------------| +| 1 | 1 | 2 | Fascinating read! | 2024-01-16 | +| 2 | 1 | *null* | Where can I learn more? | 2024-01-17 | +| 3 | 2 | 3 | A great introduction. | 2024-02-02 | +| 4 | 3 | 2 | A classic. | 2024-03-11 | +| 5 | 3 | *null* | Mind-blowing. | 2024-03-12 | +| 6 | 5 | 1 | The moth story never gets old. | 2024-04-06 | + +## How to read a recipe + +Each recipe is a Haskell snippet followed by the SQL that beam generated for +it. The snippets run in `IO` with a `conn :: Connection` in scope; queries are +executed with `runBeamPostgresDebug putStrLn conn`, which is what captures the +SQL you see. Results are bound and printed with `print` so that recipes are +complete, runnable programs. + +For example, here is a recipe that fetches all authors: + +!beam-query +```haskell +!postgres-blog-sql sql +authors <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ all_ (_blogAuthors blogDb) + +mapM_ print authors +``` + +## The recipes + +- [Selecting rows](./selecting-rows.md) — filtering, sorting, limiting, projections +- [Working with NULL](./null-handling.md) — nullable columns, nullable foreign keys, `COALESCE` +- [Joins](./joins.md) — inner joins, left joins, self-joins +- [Aggregations](./aggregations.md) — `GROUP BY`, `HAVING`, counts including zero +- [Subqueries](./subqueries.md) — `EXISTS`, scalar subqueries, greatest-n-per-group +- [Window functions](./window-functions.md) — ranking and running totals diff --git a/docs/cookbook/joins.md b/docs/cookbook/joins.md new file mode 100644 index 000000000..42ba45e9a --- /dev/null +++ b/docs/cookbook/joins.md @@ -0,0 +1,169 @@ +These recipes show how to combine tables, using the +[blog example database](./index.md#the-example-database). For a full +discussion of relationships in beam, see the +[relationships guide](../user-guide/queries/relationships.md). + +## Inner join via a foreign key + +Use `related_` to follow a foreign key to the row it references. Beam +generates the join condition for you. Here is every post with the name of its +author: + +!beam-query +```haskell +!postgres-blog-sql sql +postsAndAuthors <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do post <- all_ (_blogPosts blogDb) + author <- related_ (_blogAuthors blogDb) (_postAuthor post) + pure (_postTitle post, _authorName author) + +mapM_ print postsAndAuthors +``` + +## Join several tables + +Joins compose monadically, so a three-table join is just more binds. Here is +every attributed comment, with the title of the post it was left on and the +name of the commenter: + +!beam-query +```haskell +!postgres-blog-sql sql +commentsInContext <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do comment <- all_ (_blogComments blogDb) + post <- related_ (_blogPosts blogDb) (_commentPost comment) + commenter <- join_ (_blogAuthors blogDb) + (\author -> _commentAuthor comment ==. just_ (primaryKey author)) + pure (_postTitle post, _authorName commenter, _commentContent comment) + +mapM_ print commentsInContext +``` + +Since `comments.author__id` is a *nullable* foreign key, we cannot use +`related_` here; instead, `join_` takes an explicit join condition, where +`just_` lifts the non-nullable primary key for comparison against the +nullable one. Anonymous comments do not satisfy the join condition, so they +are absent from the result. + +## Walk a relationship in the other direction + +The template defines relationship accessors with `oneToMany_`, for example +`authorPosts` and `postComments` (see the +[relationships guide](../user-guide/queries/relationships.md)). These make +"all the children of this parent" queries read naturally. Here is every +comment left on a post written by Ada Lovelace: + +!beam-query +```haskell +!postgres-blog-sql sql +commentsOnAda <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do author <- filter_ (\a -> _authorName a ==. val_ "Ada Lovelace") + $ all_ (_blogAuthors blogDb) + post <- authorPosts author + comment <- postComments post + pure (_postTitle post, _commentContent comment) + +mapM_ print commentsOnAda +``` + +## LEFT JOIN via a nullable foreign key + +`leftJoin_` keeps every row on the left side, pairing it with a matching row +on the right *or* an all-`NULL` row. The joined table therefore comes back +with `Nullable` columns. Here is every post with the name of its editor, if +any: + +!beam-query +```haskell +!postgres-blog-sql sql +postsAndEditors <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do post <- all_ (_blogPosts blogDb) + editor <- leftJoin_ (all_ (_blogAuthors blogDb)) + (\editor -> _postEditor post ==. just_ (primaryKey editor)) + pure (_postTitle post, _authorName editor) + +mapM_ print postsAndEditors +``` + +The second component of each result is a `Maybe Text`: it is `Nothing` for +the posts which have no editor (posts 2 and 4 in the seed data). + +## LEFT JOIN to keep parents without children + +The same construct keeps rows that have no children at all. Here is every +post paired with its comments — including post 4, which has none: + +!beam-query +```haskell +!postgres-blog-sql sql +postsAndComments <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do post <- all_ (_blogPosts blogDb) + comment <- leftJoin_ (all_ (_blogComments blogDb)) + (\comment -> _commentPost comment ==. primaryKey post) + pure (_postTitle post, _commentContent comment) + +mapM_ print postsAndComments +``` + +To *count* the comments per post instead, see +[counting children, including zero](./aggregations.md#count-children-including-those-with-zero). + +## Self-join + +Nothing prevents a table from being joined with itself. Here is every pair of +distinct posts written by the same author: + +!beam-query +```haskell +!postgres-blog-sql sql +pairs <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do post <- all_ (_blogPosts blogDb) + other <- filter_ (\other -> _postAuthor other ==. _postAuthor post + &&. _postId other >. _postId post) + $ all_ (_blogPosts blogDb) + pure (_postTitle post, _postTitle other) + +mapM_ print pairs +``` + +The `>.` on the primary keys avoids pairing a post with itself, and avoids +returning each pair twice. + +## Compare two foreign keys on the same row + +A related trick: comparing key fields on a single row. Here is every post +whose editor is a *different* person from its author: + +!beam-query +```haskell +!postgres-blog-sql sql +reviewed <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\post -> _postEditor post /=. just_ (_postAuthor post)) + $ all_ (_blogPosts blogDb) + +mapM_ print reviewed +``` + +Posts without an editor are excluded automatically: their `editor__id` is +`NULL`, and `NULL <> x` is not true in SQL. diff --git a/docs/cookbook/null-handling.md b/docs/cookbook/null-handling.md new file mode 100644 index 000000000..ab57c01c7 --- /dev/null +++ b/docs/cookbook/null-handling.md @@ -0,0 +1,109 @@ +SQL `NULL` is a frequent source of surprises. These recipes show how beam +deals with nullable columns and nullable foreign keys, using the +[blog example database](./index.md#the-example-database): `authors.bio` and +`posts.published_on` are nullable columns, while `posts.editor__id` and +`comments.author__id` are nullable foreign keys. + +## Find rows where a column IS NULL + +Nullable columns have Haskell type `Maybe a`, and `isNothing_` generates +`IS NULL`. For example, drafts are posts that have not been published yet: + +!beam-query +```haskell +!postgres-blog-sql sql +drafts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\post -> isNothing_ (_postPublishedOn post)) + $ all_ (_blogPosts blogDb) + +mapM_ print drafts +``` + +With the seed data, this returns post 4, *Thoughts on the future*. The +converse, `isJust_`, generates `IS NOT NULL` and would return all published +posts. + +!!! warning "Warning" + Do not write `_postPublishedOn post ==. val_ Nothing` to look for NULLs. + This generates `published_on = NULL`, which is *never* true in SQL — + `NULL` is not equal to anything, not even itself. Always use `isNothing_` + and `isJust_`. + +## Supply a default for NULL values (COALESCE) + +Use `fromMaybe_` (or `coalesce_` for more than one fallback) to replace `NULL` +with a default value: + +!beam-query +```haskell +!postgres-blog-sql sql +bios <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do author <- all_ (_blogAuthors blogDb) + pure ( _authorName author + , fromMaybe_ (val_ "No bio provided") (_authorBio author)) + +mapM_ print bios +``` + +Grace Hopper has no bio in the seed data, so her row reads +`("Grace Hopper", "No bio provided")` — and note that the result type is +`Text`, not `Maybe Text`: the default proves to the type system that the +value cannot be missing. + +## Find rows where a foreign key IS NULL + +A nullable foreign key is represented as a `PrimaryKey` over `Nullable f`, +so its columns are `Maybe` values. Unwrap the key with a pattern match and +test the column with `isNothing_`. For example, anonymous comments are +comments without an author: + +!beam-query +```haskell +!postgres-blog-sql sql +anonymousComments <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\comment -> + let AuthorId author = _commentAuthor comment + in isNothing_ author) + $ all_ (_blogComments blogDb) + +mapM_ print anonymousComments +``` + +With the seed data, this returns comments 2 and 5. + +!!! warning "Warning" + Beam lets you write `_commentAuthor comment ==. nothing_`, but just as + with `==. val_ Nothing` above, the generated SQL is `author__id = NULL`, + which never holds. Unwrap the key and use `isNothing_`. + +## Find rows where a foreign key IS NOT NULL + +The same pattern with `isJust_` finds posts that went through an editor: + +!beam-query +```haskell +!postgres-blog-sql sql +editedPosts' <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\post -> + let AuthorId editor = _postEditor post + in isJust_ editor) + $ all_ (_blogPosts blogDb) + +mapM_ print editedPosts' +``` + +With the seed data, this returns posts 1, 3 and 5. To go further and fetch +the editors themselves, see +[joining against a nullable foreign key](./joins.md#left-join-via-a-nullable-foreign-key). diff --git a/docs/cookbook/selecting-rows.md b/docs/cookbook/selecting-rows.md new file mode 100644 index 000000000..9d25a7565 --- /dev/null +++ b/docs/cookbook/selecting-rows.md @@ -0,0 +1,121 @@ +These recipes cover the most common ways of getting rows out of a database. +All of them run against the [blog example database](./index.md#the-example-database). + +## Fetch all rows from a table + +Use `all_` to select every row of a table, and `runSelectReturningList` to +fetch the results. + +!beam-query +```haskell +!postgres-blog-sql sql +authors <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ all_ (_blogAuthors blogDb) + +mapM_ print authors +``` + +## Fetch a row by primary key + +Use `lookup_` to fetch a single row by its primary key. Since the row may not +exist, use `runSelectReturningOne`, which returns a `Maybe`. + +!beam-query +```haskell +!postgres-blog-sql sql +maybeAuthor <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningOne + $ lookup_ (_blogAuthors blogDb) (AuthorId 1) + +print maybeAuthor +``` + +## Filter rows with a condition + +Use `filter_` to add a `WHERE` clause. Comparison operators are spelled with a +trailing dot (`==.`, `>.`, `<=.`, ...) to distinguish them from their Haskell +counterparts. Here we look for posts published after February 1st, 2024. +Because `published_on` is nullable, the comparison is between `Maybe Day` +values, and we lift the literal with `just_`: + +!beam-query +```haskell +!postgres-blog-sql sql +recentPosts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\post -> _postPublishedOn post >. just_ (val_ (fromGregorian 2024 2 1))) + $ all_ (_blogPosts blogDb) + +mapM_ print recentPosts +``` + +With the seed data, this returns posts 3 and 5. Note that the draft (post 4) +is excluded automatically: its `published_on` is `NULL`, and `NULL > '2024-02-01'` +is not true. + +## Return only some columns + +Queries are monadic: bind the rows of a table and `pure` the columns (or +arbitrary expressions) you want. Beam narrows the `SELECT` projection +accordingly. + +!beam-query +```haskell +!postgres-blog-sql sql +titles <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do post <- all_ (_blogPosts blogDb) + pure (_postTitle post, _postPublishedOn post) + +mapM_ print titles +``` + +## Sort and limit + +Use `orderBy_` with `asc_`/`desc_` for `ORDER BY`, and `limit_`/`offset_` for +`LIMIT`/`OFFSET`. Here are the three most recent comments: + +!beam-query +```haskell +!postgres-blog-sql sql +latestComments <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ limit_ 3 + $ orderBy_ (desc_ . _commentPostedOn) + $ all_ (_blogComments blogDb) + +mapM_ print latestComments +``` + +!!! note "Note" + `limit_` must be applied *outside* `orderBy_` (limit-then-order would be + a different query!). Beam composes these in the order you write them. + +## Select distinct values + +Use `nub_` for `SELECT DISTINCT`. For example, the set of days on which a +comment was posted: + +!beam-query +```haskell +!postgres-blog-sql sql +days <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ nub_ + $ fmap _commentPostedOn + $ all_ (_blogComments blogDb) + +mapM_ print days +``` diff --git a/docs/cookbook/subqueries.md b/docs/cookbook/subqueries.md new file mode 100644 index 000000000..471f44d71 --- /dev/null +++ b/docs/cookbook/subqueries.md @@ -0,0 +1,110 @@ +These recipes nest one query inside another, using the +[blog example database](./index.md#the-example-database). + +## EXISTS: rows with at least one related row + +`exists_` embeds a whole query as a boolean expression. Here are the authors +who have at least one unpublished draft: + +!beam-query +```haskell +!postgres-blog-sql sql +authorsWithDrafts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\author -> + exists_ (filter_ (\post -> _postAuthor post ==. primaryKey author + &&. isNothing_ (_postPublishedOn post)) + (all_ (_blogPosts blogDb)))) + $ all_ (_blogAuthors blogDb) + +mapM_ print authorsWithDrafts +``` + +With the seed data, only Ada Lovelace has a draft. Note that the inner query +freely refers to `author`, bound in the outer query — a *correlated* +subquery. + +## NOT EXISTS: rows with no related row (anti-join) + +Negate `exists_` with `not_` to find rows *without* a match. Here are the +posts nobody has commented on: + +!beam-query +```haskell +!postgres-blog-sql sql +quietPosts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\post -> + not_ (exists_ (filter_ (\comment -> _commentPost comment ==. primaryKey post) + (all_ (_blogComments blogDb))))) + $ all_ (_blogPosts blogDb) + +mapM_ print quietPosts +``` + +This is the `NOT EXISTS` spelling of an *anti-join*. The same result can be +obtained with a `LEFT JOIN` and an `IS NULL` filter, but `NOT EXISTS` +states the intent more directly. + +## Scalar subqueries + +When a query returns a single value, `subquery_` turns it into an expression +usable inside another query. Here are the posts published after the very +first comment on the site: + +!beam-query +```haskell +!postgres-blog-sql sql +posts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ filter_ (\post -> + _postPublishedOn post + >=. subquery_ (aggregate_ (min_ . _commentPostedOn) + (all_ (_blogComments blogDb)))) + $ all_ (_blogPosts blogDb) + +mapM_ print posts +``` + +The types line up because both sides are `Maybe Day`: `published_on` is +nullable, and `MIN` over an empty table would be `NULL`. As usual, the draft +post is excluded by the `NULL` comparison semantics. + +## Greatest-n-per-group: the latest comment on each post + +A classic: for each post, fetch the most recent comment *row* (not just its +date). Compute the latest date per post in an aggregation subquery, then join +the comments table against it: + +!beam-query +```haskell +!postgres-blog-sql sql +latestComments <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ do (postKey, lastCommentedOn) <- + aggregate_ (\comment -> ( group_ (_commentPost comment) + , max_ (_commentPostedOn comment) )) + $ all_ (_blogComments blogDb) + comment <- join_ (_blogComments blogDb) + (\comment -> _commentPost comment ==. postKey + &&. just_ (_commentPostedOn comment) ==. lastCommentedOn) + pure comment + +mapM_ print latestComments +``` + +With the seed data, this returns comments 2, 3, 5 and 6 — the newest comment +on each of the four commented posts. + +!!! note "Note" + If two comments on the same post shared the same date, both would be + returned. Breaking such ties requires ranking rows; see + [window functions](./window-functions.md). diff --git a/docs/cookbook/window-functions.md b/docs/cookbook/window-functions.md new file mode 100644 index 000000000..698a8f9fa --- /dev/null +++ b/docs/cookbook/window-functions.md @@ -0,0 +1,85 @@ +Window functions compute a value over a *window* of related rows — without +collapsing the rows the way `GROUP BY` does. These recipes use the +[blog example database](./index.md#the-example-database); the +[window functions guide](../user-guide/queries/window-functions.md) explains +the machinery in detail. + +!!! note "Note" + Window functions are supported by `beam-postgres` and `beam-duckdb`, but + not `beam-sqlite`. + +## Rank rows within a group + +`withWindow_` introduces the window frames, then the projection function +combines each row with expressions computed `over_` a frame. Here is each +post ranked by publication date *within its author's posts*: + +!beam-query +```haskell +!postgres-blog-sql sql +rankedPosts <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ withWindow_ (\post -> frame_ (partitionBy_ (_postAuthor post)) + (orderPartitionBy_ (asc_ (_postPublishedOn post))) + noBounds_) + (\post window -> ( _postTitle post + , as_ @Int32 rank_ `over_` window )) + $ all_ (_blogPosts blogDb) + +mapM_ print rankedPosts +``` + +With the seed data, Grace Hopper's *Compilers from scratch* ranks 1 and +*Debugging stories* ranks 2; Ada Lovelace's unpublished draft sorts after her +published post, because PostgreSQL places `NULL`s last when ordering in +ascending order. + +## Running totals + +An ordered window with no partition runs over the whole table. Here is the +cumulative number of comments on the site over time: + +!beam-query +```haskell +!postgres-blog-sql sql +commentActivity <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ withWindow_ (\comment -> frame_ noPartition_ + (orderPartitionBy_ (asc_ (_commentPostedOn comment))) + noBounds_) + (\comment window -> ( _commentPostedOn comment + , as_ @Int32 countAll_ `over_` window )) + $ all_ (_blogComments blogDb) + +mapM_ print commentActivity +``` + +Each comment is paired with the number of comments posted up to and including +its date. + +## Compare each row to its group's aggregate + +A window aggregate makes "this row vs. its group" comparisons easy, since the +per-group value is available on every row. Here is each comment alongside the +total number of comments on the same post: + +!beam-query +```haskell +!postgres-blog-sql sql +commentsWithContext <- + runBeamPostgresDebug putStrLn conn + $ runSelectReturningList + $ select + $ withWindow_ (\comment -> frame_ (partitionBy_ (_commentPost comment)) + noOrder_ + noBounds_) + (\comment window -> ( _commentContent comment + , as_ @Int32 countAll_ `over_` window )) + $ all_ (_blogComments blogDb) + +mapM_ print commentsWithContext +``` diff --git a/mkdocs.yml b/mkdocs.yml index 849e7c3be..ce782a26e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -33,6 +33,14 @@ nav: - 'UPDATE': 'user-guide/manipulation/update.md' - 'DELETE': 'user-guide/manipulation/delete.md' - 'COPY': 'user-guide/manipulation/copy.md' + - Cookbook: + - 'Introduction': 'cookbook/index.md' + - 'Selecting rows': 'cookbook/selecting-rows.md' + - 'Working with NULL': 'cookbook/null-handling.md' + - 'Joins': 'cookbook/joins.md' + - 'Aggregations': 'cookbook/aggregations.md' + - 'Subqueries': 'cookbook/subqueries.md' + - 'Window functions': 'cookbook/window-functions.md' - Schema management: - 'The Migrations Framework': 'schema-guide/migrations.md' - Backends: