diff --git a/CHANGELOG.md b/CHANGELOG.md index cee3cc24..d869f9d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,7 @@ - RowBinary: truncate NaiveDateTime resulting from DateTime64 https://github.com/plausible/ch/pull/297 - Add support for `JSON(...)` (JSON with options) type https://github.com/plausible/ch/pull/309 - RowBinary: de- and encode dynamic JSON https://github.com/plausible/ch/pull/296 -- use gregorian seconds for naive datetime encoding in RowBinary (it's faster this way) https://github.com/plausible/ch/pull/311 -- use `DateTime.to_unix/2` + `DateTime.to_naive/1` for naive datetime decoding in RowBinary https://github.com/plausible/ch/pull/313 +- use gregorian seconds for naive datetime in RowBinary (it's faster this way) https://github.com/plausible/ch/pull/311, https://github.com/plausible/ch/pull/320 - allow non-UTC timezones for DateTime64 RowBinary encoding https://github.com/plausible/ch/pull/315 - use gregorian days in RowBinary dates https://github.com/plausible/ch/pull/318 diff --git a/bench/naive64_decode.exs b/bench/naive64_decode.exs new file mode 100644 index 00000000..d9da3af2 --- /dev/null +++ b/bench/naive64_decode.exs @@ -0,0 +1,54 @@ +defmodule Bench do + @epoch_gregorian_seconds 62_167_219_200 + + # current: allocate DateTime, discard it + def via_unix(ticks, time_unit) do + ticks + |> DateTime.from_unix!(time_unit) + |> DateTime.to_naive() + end + + # direct: decompose ticks into seconds + sub-second remainder + def via_gregorian(ticks, time_unit) do + seconds = div(ticks, time_unit) + remainder = rem(ticks, time_unit) + + microsecond = + if time_unit <= 1_000_000 do + {remainder * div(1_000_000, time_unit), precision(time_unit)} + else + {div(remainder, div(time_unit, 1_000_000)), 6} + end + + NaiveDateTime.from_gregorian_seconds(seconds + @epoch_gregorian_seconds, microsecond) + end + + @compile inline: [time_unit: 1] + for precision <- 0..9 do + time_unit = Integer.pow(10, precision) + defp time_unit(unquote(precision)), do: unquote(time_unit) + end + + defp precision(1), do: 0 + defp precision(10), do: 1 + defp precision(100), do: 2 + defp precision(1_000), do: 3 + defp precision(10_000), do: 4 + defp precision(100_000), do: 5 + defp precision(_), do: 6 +end + +millis = Enum.map(1..1_000_000, fn i -> 1_700_000_000_000 + i end) +micros = Enum.map(1..1_000_000, fn i -> 1_700_000_000_000_000 + i end) + +Benchee.run( + %{ + "via_unix" => fn input -> Enum.each(input, &Bench.via_unix(&1, 1_000)) end, + "via_gregorian" => fn input -> Enum.each(input, &Bench.via_gregorian(&1, 1_000)) end + }, + inputs: %{ + "milliseconds" => millis, + "microseconds" => micros + } + # profile_after: true +) diff --git a/bench/naive_decode.exs b/bench/naive_decode.exs new file mode 100644 index 00000000..81b5a539 --- /dev/null +++ b/bench/naive_decode.exs @@ -0,0 +1,24 @@ +defmodule Bench do + @epoch_gregorian_seconds 62_167_219_200 + @epoch_naive_datetime ~N[1970-01-01 00:00:00] + + def via_add(seconds) do + NaiveDateTime.add(@epoch_naive_datetime, seconds) + end + + def via_unix(seconds) do + seconds + |> DateTime.from_unix!() + |> DateTime.to_naive() + end + + def via_gregorian(seconds) do + NaiveDateTime.from_gregorian_seconds(seconds + @epoch_gregorian_seconds) + end +end + +Benchee.run(%{ + "via_add" => fn -> Enum.each(1..1_000_000, &Bench.via_add/1) end, + "via_unix" => fn -> Enum.each(1..1_000_000, &Bench.via_unix/1) end, + "via_gregorian" => fn -> Enum.each(1..1_000_000, &Bench.via_gregorian/1) end +}) diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index 98fc7db0..a0d67dae 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -1452,13 +1452,11 @@ defmodule Ch.RowBinary do {:datetime, timezone} -> case bin do <> -> - dt = DateTime.from_unix!(s) - dt = case timezone do - nil -> DateTime.to_naive(dt) - "UTC" -> dt - _ -> DateTime.shift_zone!(dt, timezone) + nil -> NaiveDateTime.from_gregorian_seconds(s + @epoch_gregorian_seconds) + "UTC" -> DateTime.from_unix!(s) + _ -> s |> DateTime.from_unix!() |> DateTime.shift_zone!(timezone) end decode_rows(types_rest, bin, [dt | row], rows, types) @@ -1530,14 +1528,20 @@ defmodule Ch.RowBinary do {:datetime64, time_unit, timezone} -> case bin do - <> -> - dt = DateTime.from_unix!(s, time_unit) - + <> -> dt = case timezone do - nil -> DateTime.to_naive(dt) - "UTC" -> dt - _ -> DateTime.shift_zone!(dt, timezone) + nil -> + gregorian_seconds = div(ticks, time_unit) + @epoch_gregorian_seconds + subsecond_ticks = rem(ticks, time_unit) + microsecond_precision = microsecond_precision(subsecond_ticks, time_unit) + NaiveDateTime.from_gregorian_seconds(gregorian_seconds, microsecond_precision) + + "UTC" -> + DateTime.from_unix!(ticks, time_unit) + + _ -> + ticks |> DateTime.from_unix!(time_unit) |> DateTime.shift_zone!(timezone) end decode_rows(types_rest, bin, [dt | row], rows, types) @@ -1605,10 +1609,15 @@ defmodule Ch.RowBinary do end end - @compile inline: [time_unit: 1] + @compile inline: [time_unit: 1, time_precision: 1] for precision <- 0..9 do time_unit = Integer.pow(10, precision) + defp time_unit(unquote(precision)), do: unquote(time_unit) + + if precision <= 6 do + defp time_precision(unquote(time_unit)), do: unquote(precision) + end end @compile inline: [time_after_midnight: 2] @@ -1625,4 +1634,13 @@ defmodule Ch.RowBinary do # TODO: we could potentially decode ClickHouse's Time/Time64 values as Elixir's Duration when it's out of Elixir's Time range end end + + @compile inline: [microsecond_precision: 2] + defp microsecond_precision(subsecond_ticks, time_unit) when time_unit <= 1_000_000 do + {subsecond_ticks * div(1_000_000, time_unit), time_precision(time_unit)} + end + + defp microsecond_precision(subsecond_ticks, time_unit) do + {div(subsecond_ticks, div(time_unit, 1_000_000)), 6} + end end