feat(sales): wire SSR page to parquet/DuckDB layer with full 7.9M-record support

- Add fetch-page-ssr and summarize-page-ssr to read from parquet via DuckDB
- Add get-sales-orders-summary for cross-page totals (SUM across all rows)
- Optimize parquet-query for large ranges (>60 days) with year-level globs
- Add default-date-range with fallback to data's actual range
- Fix migration: flatten-order-to-pieces! vswap!, pull specs, date handling
- Add denormalized columns: payment-methods, processors, categories, source
- Handle schema-enforce middleware stripping dates via raw query-string parsing
- Add graceful fallback for missing parquet files (catch Exception)
- Fix load-unflushed! with .exists check on WAL files
This commit is contained in:
2026-04-27 20:05:13 -07:00
parent ea7f46ea8a
commit 9153494ed7
4 changed files with 329 additions and 157 deletions

View File

@@ -128,12 +128,12 @@
(->> @*buffers*
vals (mapcat identity) count))
(defn flush-to-parquet! [entity-type]
(defn flush-to-parquet! [entity-type date-str]
"Flush buffered records for entity-type to parquet + S3."
(let [records (get @*buffers* entity-type [])]
(if (empty? records)
{:status :no-records}
(let [date-str (.toString (LocalDate/now))
(let [date-str (or date-str (.toString (LocalDate/now)))
jsonl-file (io/file "/tmp"
(str entity-type "-" date-str ".jsonl"))
parquet-file (io/file "/tmp"
@@ -162,10 +162,11 @@
"Flush all entity types for today."
(let [etypes ["sales-order" "charge"
"line-item" "sales-refund"]
today (.toString (LocalDate/now))
flushed (into #{}
(keep (fn [et]
(let [{:keys [status]}
(flush-to-parquet! et)]
(flush-to-parquet! et today)]
(when (= status :ok)
et))))
etypes)]
@@ -190,11 +191,12 @@
{}
(into {}
(keep (fn [et]
(let [f (io/file
(wal-dir)
(str et ".jsonl"))]
[et (slurp f)])))
etypes))]
(let [f (io/file
(wal-dir)
(str et ".jsonl"))]
(when (.exists f)
[et (slurp f)])))
etypes)))]
(swap! *buffers* merge loaded)))
(defn get-unflushed-count []
@@ -218,66 +220,135 @@
(defn today []
(.toString (LocalDate/now)))
(defn- parquet-glob [entity-type start-date end-date]
"Build a glob pattern or explicit file list for the date range.
Uses glob patterns for ranges > 60 days; explicit list otherwise."
(let [days (-> (LocalDate/parse end-date)
(.toEpochDay)
(- (.toEpochDay (LocalDate/parse start-date)))
inc)]
(if (> days 60)
(let [prefix (format "s3://%s/sales-details/%s/" *bucket* entity-type)
sy (-> (LocalDate/parse start-date) .getYear)
ey (-> (LocalDate/parse end-date) .getYear)]
(if (= sy ey)
[(format "%s%d-*.parquet" prefix sy)]
(vec
(for [y (range sy (inc ey))]
(format "%s%d-*.parquet" prefix y)))))
(vec
(map (fn [d]
(format "'s3://%s/sales-details/%s/%s.parquet'"
*bucket* entity-type d))
(date-seq start-date end-date))))))
(defn parquet-query [entity-type start-date end-date]
"Build SQL to read all parquet files in date range.
Returns map with :sql and :count-sql keys."
(let [date-strs (date-seq start-date end-date)
urls (vec
(map (fn [d]
(format "'s3://%s/sales-details/%s/%s.parquet'"
*bucket* entity-type d))
date-strs))
sql (str "SELECT * FROM read_parquet(["
(str/join ", " urls)
"])")]
(let [globs (parquet-glob entity-type start-date end-date)
use-glob? (some #(.endsWith ^String % "*.parquet") globs)
base (if use-glob?
(format "SELECT * FROM read_parquet(%s, union_by_name=true)"
(if (= (count globs) 1)
(format "'%s'" (first globs))
(format "[%s]"
(str/join ", " (map #(format "'%s'" %) globs)))))
(format "SELECT * FROM read_parquet([%s])"
(str/join ", " globs)))
add-date-filter (fn [sql]
(if (> (-> (LocalDate/parse end-date)
(.toEpochDay)
(- (.toEpochDay (LocalDate/parse start-date)))
inc)
60)
(format "%s WHERE date >= '%s' AND date <= '%s'"
sql start-date end-date)
sql))
sql (add-date-filter base)]
{:sql sql
:count-sql (format "SELECT COUNT(*) FROM (%s) t" sql)}))
(defn- build-where-clause [opts field-pairs]
"Build SQL WHERE clause from opts map.
fields-with-keys is vector of [:field-key :env-var-name]."
(let [clauses (keep
(fn [[key env]]
(let [v (get opts key)]
(when v
(str env " = '" v "'"))))
field-pairs)]
(when (seq clauses)
(str " WHERE " (str/join " AND " clauses)))))
(defn- like-clause [col v]
(str "\"" col "\" LIKE '%" v "%'"))
(defn- build-sales-orders-where [opts]
(let [eq-clauses (keep
(fn [[key col]]
(let [v (get opts key)]
(when v
(str "\"" col "\" = '" v "'"))))
[[:client "client-code"]
[:vendor "vendor"]
[:location "location"]])
like-clauses (keep
(fn [[key col]]
(let [v (get opts key)]
(when v
(like-clause col v))))
[[:payment-method "payment-methods"]
[:processor "processors"]
[:category "categories"]])
range-clauses (keep
(fn [[key col op]]
(let [v (get opts key)]
(when v
(str "\"" col "\" " op " " v))))
[[:total-gte "total" ">="]
[:total-lte "total" "<="]])
all-clauses (concat eq-clauses like-clauses range-clauses)]
(when (seq all-clauses)
(str " WHERE " (str/join " AND " all-clauses)))))
(defn get-sales-orders
([start-date end-date]
(get-sales-orders start-date end-date {}))
([start-date end-date opts]
(let [q (parquet-query "sales-order"
start-date end-date)
base-sql (:sql q)
count-sql (:count-sql q)
sort (get opts :sort "date")
order (get opts :order "DESC")
limit (get opts :limit)
offset (get opts :offset)
where-str (build-where-clause
opts
[[:client "client-code"]
[:vendor "vendor"]
[:location "location"]])
full-sql (if where-str
(str base-sql where-str)
base-sql)
result (cond-> full-sql
sort (str " ORDER BY " sort
" " (name order))
limit (str " LIMIT " limit)
offset (str " OFFSET " offset))
full-count (if where-str
(str count-sql where-str)
count-sql)]
{:rows (query-rows result)
:count (or
(int
(query-scalar
full-count)) 0)})))
(try
(let [q (parquet-query "sales-order"
start-date end-date)
base-sql (:sql q)
count-sql (:count-sql q)
sort (get opts :sort "date")
order (get opts :order "DESC")
limit (get opts :limit)
offset (get opts :offset)
where-str (build-sales-orders-where opts)
full-sql (if where-str
(str base-sql where-str)
base-sql)
result (cond-> full-sql
sort (str " ORDER BY " sort
" " (name order))
limit (str " LIMIT " limit)
offset (str " OFFSET " offset))
full-count (if where-str
(str count-sql where-str)
count-sql)]
{:rows (query-rows result)
:count (or
(int
(query-scalar
full-count)) 0)})
(catch Exception _
{:rows [] :count 0}))))
(defn get-sales-orders-summary
([start-date end-date]
(get-sales-orders-summary start-date end-date {}))
([start-date end-date opts]
(try
(let [q (parquet-query "sales-order" start-date end-date)
base-sql (:sql q)
where-str (build-sales-orders-where opts)
full-sql (if where-str
(str base-sql where-str)
base-sql)
sum-sql (format "SELECT COALESCE(SUM(total), 0) as total, COALESCE(SUM(tax), 0) as tax FROM (%s) t" full-sql)
row (first (query-rows sum-sql))]
{:total (or (:total row) 0.0)
:tax (or (:tax row) 0.0)})
(catch Exception _
{:total 0.0 :tax 0.0}))))
(defn query-deduped [entity-type start-date end-date]
"Query records deduplicated by external-id (latest _seq_no wins)."