feat(sales): wire SSR page to parquet/DuckDB layer with full 7.9M-record support

- Add fetch-page-ssr and summarize-page-ssr to read from parquet via DuckDB
- Add get-sales-orders-summary for cross-page totals (SUM across all rows)
- Optimize parquet-query for large ranges (>60 days) with year-level globs
- Add default-date-range with fallback to data's actual range
- Fix migration: flatten-order-to-pieces! vswap!, pull specs, date handling
- Add denormalized columns: payment-methods, processors, categories, source
- Handle schema-enforce middleware stripping dates via raw query-string parsing
- Add graceful fallback for missing parquet files (catch Exception)
- Fix load-unflushed! with .exists check on WAL files
This commit is contained in:
2026-04-27 20:05:13 -07:00
parent ea7f46ea8a
commit 9153494ed7
4 changed files with 329 additions and 157 deletions

View File

@@ -1,30 +1,42 @@
(ns auto-ap.datomic.sales-orders
(:require
[auto-ap.storage.parquet :as pq]
[auto-ap.time :as atime]
[clj-time.coerce :as coerce]
[clojure.set :as set]
[clojure.string :as str]
[com.brunobonacci.mulog :as mu]))
[com.brunobonacci.mulog :as mu]
[ring.util.codec :as ring-codec]))
(defn- payment-methods->charges [pm-str]
(when (not-empty pm-str)
(mapv (fn [pm] {:charge/type-name pm})
(str/split pm-str #","))))
(defn <-row
"Convert a flat parquet row into the shape consumers expect.
Parquet produces maps of the form:
{\"external-id\" \"square/order/123\", ...}
which we transform to:
{:sales-order/external-id \"square/order/123\", ...}"
"Convert a flat parquet row into the shape consumers expect."
[row]
(-> row
(set/rename-keys
{"external-id" :sales-order/external-id
"location" :sales-order/location
"total" :sales-order/total
"tax" :sales-order/tax
"tip" :sales-order/tip
"discount" :sales-order/discount
"service-charge" :sales-order/service-charge
"vendor" :sales-order/vendor
"client-code" :sales-order/client-code
"date" :sales-order/date})
(update :sales-order/date #(some-> % str))))
(let [pm (:payment-methods row)]
(-> row
(set/rename-keys
{:external-id :sales-order/external-id
:location :sales-order/location
:total :sales-order/total
:tax :sales-order/tax
:tip :sales-order/tip
:discount :sales-order/discount
:service-charge :sales-order/service-charge
:vendor :sales-order/vendor
:client-code :sales-order/client-code
:date :sales-order/date
:source :sales-order/source
:reference-link :sales-order/reference-link
:payment-methods :sales-order/payment-methods
:processors :sales-order/processors
:categories :sales-order/categories})
(update :sales-order/date #(some-> % str))
(dissoc :entity-type :_seq-no)
(assoc :sales-order/charges (payment-methods->charges pm)))))
(defn build-where-clause [args]
(let [clauses (keep identity
@@ -58,17 +70,100 @@
:order "DESC"
:limit limit
:offset offset})]
{:ids (mapv #(str (:external_id %)) (:rows result))
{:ids (mapv #(str (:external-id %)) (:rows result))
:rows (:rows result)
:count (:count result)}))))
(defn graphql-results [rows _ids _args]
(mapv <-row rows))
(defn- extract-date-str [v]
(when v
(cond
(string? v) (if (> (count v) 10) (.substring v 0 10) v)
(instance? org.joda.time.DateTime v) (atime/unparse-local v atime/normal-date)
(instance? org.joda.time.LocalDate v) (atime/unparse-local v atime/normal-date)
(instance? java.util.Date v) (atime/unparse-local (coerce/to-date-time v) atime/normal-date)
(instance? java.time.LocalDate v) (.toString v)
:else (str v))))
(defn- get-date [qp k]
(or (extract-date-str (get qp k))
(extract-date-str (get qp (name k)))))
(defn- kw->str [v]
(when (some? v)
(if (keyword? v) (name v) (str v))))
(defn- qp->opts [qp]
(let [sort-params (:sort qp)
sort-key (when (seq sort-params) (-> sort-params first :name))
sort-dir (when (seq sort-params) (-> sort-params first :dir))]
(cond-> {}
(some? (:client-code qp)) (assoc :client (kw->str (:client-code qp)))
(some? (:location qp)) (assoc :location (kw->str (:location qp)))
(not-empty (:payment-method qp)) (assoc :payment-method (:payment-method qp))
(some? (:processor qp)) (assoc :processor (kw->str (:processor qp)))
(not-empty (:category qp)) (assoc :category (:category qp))
(:total-gte qp) (assoc :total-gte (:total-gte qp))
(:total-lte qp) (assoc :total-lte (:total-lte qp))
sort-key (assoc :sort sort-key)
sort-dir (assoc :order (or sort-dir "DESC"))
true (assoc :limit (or (:per-page qp) 25)
:offset (or (:start qp) 0)))))
(defn- last-week-range []
(let [today (java.time.LocalDate/now)
end (.toString (.minusDays today 1))
start (.toString (.minusDays today 8))]
[start end]))
(defn- default-date-range []
(let [[s e] (last-week-range)
result (try (pq/get-sales-orders-summary s e) (catch Exception _ nil))]
(if (and result (> (:total result) 0))
[s e]
(let [yesterday (.toString (.minusDays (java.time.LocalDate/of 2024 4 24) 1))
week-before (.toString (.minusDays (java.time.LocalDate/of 2024 4 24) 8))]
[week-before yesterday]))))
(defn- qp->date-range [qp]
(let [[default-start default-end] (default-date-range)]
[(or (get-date qp :start-date)
(extract-date-str (get-in qp [:date-range :start]))
default-start)
(or (get-date qp :end-date)
(extract-date-str (get-in qp [:date-range :end]))
default-end)]))
(defn fetch-page-ssr
"Fetch sales orders from parquet for the SSR page."
[request]
(let [qp (:query-params request)
raw-qp (some-> (:query-string request)
ring-codec/form-decode
(->> (into {} (remove (fn [[_ v]] (str/blank? v))))))
[start end] (qp->date-range (merge raw-qp qp))
opts (qp->opts qp)
result (pq/get-sales-orders start end opts)
rows (mapv <-row (:rows result))]
{:rows rows :count (:count result)}))
(defn summarize-page-ssr
"Summarize all matching sales orders via parquet."
[request]
(let [qp (:query-params request)
raw-qp (some-> (:query-string request)
ring-codec/form-decode
(->> (into {} (remove (fn [[_ v]] (str/blank? v))))))
[start end] (qp->date-range (merge raw-qp qp))
opts (dissoc (qp->opts qp) :limit :offset :sort :order)]
(pq/get-sales-orders-summary start end opts)))
(defn summarize-orders [rows]
(when (seq rows)
(let [total (reduce + 0.0 (map #(or (:total %) 0.0) rows))
tax (reduce + 0.0 (map #(or (:tax %) 0.0) rows))]
(let [total (reduce + 0.0 (map #(or (:sales-order/total %) 0.0) rows))
tax (reduce + 0.0 (map #(or (:sales-order/tax %) 0.0) rows))]
{:total total
:tax tax})))

View File

@@ -10,14 +10,14 @@
(write-dead-letter [flat]) ; write orphaned records"
(:require [auto-ap.datomic :refer [conn]]
[auto-ap.storage.parquet :as p]
[datomic.api :as dc]
[clj-time.core :as time]))
[clojure.string :as str]
[datomic.api :as dc]))
(defn- fetch-all-sales-order-ids []
"Query Datomic for all sales-order external-ids (as entity IDs).
Returns a vector of entitity ids."
(->> (dc/q '[:find ?e
:where [_ :sales-order/external-id ?_ext]]
:where [?e :sales-order/external-id _]]
(dc/db conn))
(map first)
vec))
@@ -25,14 +25,16 @@
(def ^:private sales-order-read
'[:sales-order/external-id
:sales-order/date
{:sales-order/client [:client/code]}
{:sales-order/client [:client/code :client/name]}
:sales-order/location
:sales-order/vendor
{:sales-order/vendor [:vendor/name]}
:sales-order/total
:sales-order/tax
:sales-order/tip
:sales-order/discount
:sales-order/service-charge
:sales-order/source
:sales-order/reference-link
{:sales-order/charges
[:charge/external-id
:charge/type-name
@@ -40,7 +42,7 @@
:charge/tax
:charge/tip
:charge/date
:charge/processor
{:charge/processor [:db/ident]}
:charge/returns
{:charge/client [:client/code]}]}
{:sales-order/line-items
@@ -49,7 +51,7 @@
:order-line-item/total
:order-line-item/tax
:order-line-item/discount
{:order-line-item/unit-price {}}
:order-line-item/unit-price
:order-line-item/quantity
:order-line-item/note]}])
@@ -61,69 +63,76 @@
sales-order-read
eids)))
(defn- flatten-order-to-pieces! [order flat]
(defn- flatten-order-to-pieces! [order date-str flat]
"Flatten a pulled sales-order into :entity-type tagged maps.
Appends to the existing flat vector, which is returned."
(let [so-ext-id (:sales-order/external-id order)
so-date (.toString (:sales-order/date order))
client-code (get-in order [:sales-order/client :client/code])]
;; sales-order row
(swap! flat conj
{:entity-type "sales-order"
:external-id (str so-ext-id)
:client-code client-code
:location (:sales-order/location order)
:vendor (:sales-order/vendor order)
:total (:sales-order/total order)
:tax (:sales-order/tax order)
:tip (:sales-order/tip order)
:discount (:sales-order/discount order)
:service-charge (:sales-order/service-charge order)
:date so-date})
;; charges & line-items
so-date date-str
client-code (get-in order [:sales-order/client :client/code])
vendor-name (get-in order [:sales-order/vendor :vendor/name])
charges (:sales-order/charges order)
items (:sales-order/line-items order)
payment-methods (->> charges (map :charge/type-name) distinct (str/join ","))
processors (->> charges (map #(get-in % [:charge/processor :db/ident])) (remove nil?) distinct (map name) (str/join ","))
categories (->> items (map :order-line-item/category) (remove nil?) distinct (str/join ","))]
(vswap! flat conj
{:entity-type "sales-order"
:external-id (str so-ext-id)
:client-code client-code
:location (:sales-order/location order)
:vendor vendor-name
:total (:sales-order/total order)
:tax (:sales-order/tax order)
:tip (:sales-order/tip order)
:discount (:sales-order/discount order)
:service-charge (:sales-order/service-charge order)
:date so-date
:source (:sales-order/source order)
:reference-link (:sales-order/reference-link order)
:payment-methods payment-methods
:processors processors
:categories categories})
(when-let [charges (:sales-order/charges order)]
(doseq [chg charges]
(swap! flat conj
{:entity-type "charge"
:external-id (str (get chg :charge/external-id))
:type-name (get chg :charge/type-name)
:total (get chg :charge/total)
:tax (get chg :charge/tax)
:tip (get chg :charge/tip)
:date so-date
:processor (get-in chg [:charge/processor :db/ident])
:sales-order-external-id (str so-ext-id)})
;; charge returns → sales-refund rows
(vswap! flat conj
{:entity-type "charge"
:external-id (str (get chg :charge/external-id))
:type-name (get chg :charge/type-name)
:total (get chg :charge/total)
:tax (get chg :charge/tax)
:tip (get chg :charge/tip)
:date so-date
:processor (get-in chg [:charge/processor :db/ident])
:sales-order-external-id (str so-ext-id)})
(when-let [returns (:charge/returns chg)]
(doseq [rt returns]
(swap! flat conj
{:entity-type "sales-refund"
:type-name (get rt :type-name)
:total (get rt :total)
:sales-order-external-id (str so-ext-id)})))))
;; line-items
(vswap! flat conj
{:entity-type "sales-refund"
:type-name (get rt :type-name)
:total (get rt :total)
:sales-order-external-id (str so-ext-id)})))))
(when-let [items (:sales-order/line-items order)]
(doseq [li items]
(swap! flat conj
{:entity-type "line-item"
:item-name (get li :order-line-item/item-name)
:category (get li :order-line-item/category)
:total (get li :order-line-item/total)
:tax (get li :order-line-item/tax)
:discount (get li :order-line-item/discount)
:sales-order-external-id (str so-ext-id)})))))
(vswap! flat conj
{:entity-type "line-item"
:item-name (get li :order-line-item/item-name)
:category (get li :order-line-item/category)
:total (get li :order-line-item/total)
:tax (get li :order-line-item/tax)
:discount (get li :order-line-item/discount)
:sales-order-external-id (str so-ext-id)})))))
(defn -fetch-order-ids-for-date
"Query Datomic for all sales-order eids on a given business date."
[db date-str]
(let [day-ms (.toEpochSecond ^java.time.LocalDate (java.time.LocalDate/parse date-str))
start (* day-ms 1000)
end (+ start (* 86400000))]
(let [ld (java.time.LocalDate/parse date-str)
start (-> ld (.atStartOfDay (java.time.ZoneId/of "America/Los_Angeles")) .toInstant java.util.Date/from)
end (-> ld (.plusDays 1) (.atStartOfDay (java.time.ZoneId/of "America/Los_Angeles")) .toInstant java.util.Date/from)]
(->> (dc/q '[:find ?e
:in $ ?start-ms ?end-ms
:where [_ :sales-order/date ?d]
[(>= ?d ?start-ms)]
[(<= ?d ?end-ms)]]
:in $ ?start ?end
:where [?e :sales-order/date ?d]
[(>= ?d ?start)]
[(< ?d ?end)]]
db start end)
(map first)
vec)))
@@ -137,9 +146,9 @@
(for [i (range 0 (inc days))]
(.toString (.plusDays sd i)))))
(defn- write-day-by-day
(defn write-day-by-day
([start-date end-date]
(write-day-by-day start-date end-date nil))
(write-day-by-day start-date end-date {}))
([start-date end-date opts]
(let [all-dates (set (or (opts :date-set) []))
date-range (if (empty? all-dates)
@@ -155,12 +164,12 @@
(let [orders (pull-sales-order-data batch)
flat (volatile! [])]
(doseq [o orders]
(flatten-order-to-pieces! o flat))
(flatten-order-to-pieces! o day flat))
(doseq [r @flat]
(p/buffer! (:entity-type r) r)))))
(doseq [etype ["sales-order" "charge"
"line-item" "sales-refund"]]
(p/flush-to-parquet! etype))
(doseq [etype ["sales-order" "charge"
"line-item" "sales-refund"]]
(p/flush-to-parquet! etype day))
(println "[migration]" day "complete"))
{:status :completed :total-days (count date-range)})))
@@ -180,10 +189,11 @@
"Flush all entity-type buffers, tracking counts."
(let [etypes ["sales-order" "charge"
"line-item" "sales-refund"]
today (.toString (java.time.LocalDate/now))
start (p/total-buf-count)]
(doseq [et etypes]
(try
(p/flush-to-parquet! et)
(p/flush-to-parquet! et today)
(catch Exception e
(println "[migration/flush]" et "error:" (.getMessage e)))))
{:records-flush (- (p/total-buf-count) start)}))
@@ -217,7 +227,7 @@
(doseq [o (pull-sales-order-data order-ids)
:when (not (:sales-order/date o))]
(let [flat (volatile! [])]
(flatten-order-to-pieces! o flat)
(flatten-order-to-pieces! o "unknown" flat)
(doseq [r @flat]
(p/buffer! "dead" r))))
(write-day-by-day start-date end-date {:batch-size 100})

View File

@@ -1,7 +1,7 @@
(ns auto-ap.ssr.pos.sales-orders
(:require
[auto-ap.datomic
:refer [add-sorter-fields apply-pagination apply-sort-3 conn merge-query
:refer [add-sorter-fields apply-pagination apply-sort-3 merge-query
pull-many query2]]
[auto-ap.datomic.sales-orders :as d-sales]
[auto-ap.query-params :as query-params :refer [wrap-copy-qp-pqp]]
@@ -17,7 +17,6 @@
[auto-ap.time :as atime]
[bidi.bidi :as bidi]
[clj-time.coerce :as c]
[datomic.api :as dc]
[malli.core :as mc]))
(def query-schema (mc/schema
@@ -172,11 +171,8 @@
charges))
(defn fetch-page [request]
(let [db (dc/db conn)
{ids-to-retrieve :ids matching-count :count} (fetch-ids db request)]
[(->> (hydrate-results ids-to-retrieve db request))
matching-count]))
(let [{:keys [rows count]} (d-sales/fetch-page-ssr request)]
[rows count]))
(def grid-page
@@ -200,13 +196,13 @@
:title "Sales orders"
:entity-name "Sales orders"
:route :pos-sales-table
:action-buttons (fn [request]
(let [{:keys [total tax]} (d-sales/summarize-orders (:ids (fetch-ids (dc/db conn) request)))]
(when (and total tax)
[(com/pill {:color :primary}
(format "Total $%.2f" total))
(com/pill {:color :secondary}
(format "Tax $%.2f" tax))])))
:action-buttons (fn [request]
(let [{:keys [total tax]} (d-sales/summarize-page-ssr request)]
(when (and total tax)
[(com/pill {:color :primary}
(format "Total $%.2f" total))
(com/pill {:color :secondary}
(format "Tax $%.2f" tax))])))
:row-buttons (fn [_ e]
(when (:sales-order/reference-link e)
[(com/a-icon-button {:href (:sales-order/reference-link e)}

View File

@@ -128,12 +128,12 @@
(->> @*buffers*
vals (mapcat identity) count))
(defn flush-to-parquet! [entity-type]
(defn flush-to-parquet! [entity-type date-str]
"Flush buffered records for entity-type to parquet + S3."
(let [records (get @*buffers* entity-type [])]
(if (empty? records)
{:status :no-records}
(let [date-str (.toString (LocalDate/now))
(let [date-str (or date-str (.toString (LocalDate/now)))
jsonl-file (io/file "/tmp"
(str entity-type "-" date-str ".jsonl"))
parquet-file (io/file "/tmp"
@@ -162,10 +162,11 @@
"Flush all entity types for today."
(let [etypes ["sales-order" "charge"
"line-item" "sales-refund"]
today (.toString (LocalDate/now))
flushed (into #{}
(keep (fn [et]
(let [{:keys [status]}
(flush-to-parquet! et)]
(flush-to-parquet! et today)]
(when (= status :ok)
et))))
etypes)]
@@ -190,11 +191,12 @@
{}
(into {}
(keep (fn [et]
(let [f (io/file
(wal-dir)
(str et ".jsonl"))]
[et (slurp f)])))
etypes))]
(let [f (io/file
(wal-dir)
(str et ".jsonl"))]
(when (.exists f)
[et (slurp f)])))
etypes)))]
(swap! *buffers* merge loaded)))
(defn get-unflushed-count []
@@ -218,66 +220,135 @@
(defn today []
(.toString (LocalDate/now)))
(defn- parquet-glob [entity-type start-date end-date]
"Build a glob pattern or explicit file list for the date range.
Uses glob patterns for ranges > 60 days; explicit list otherwise."
(let [days (-> (LocalDate/parse end-date)
(.toEpochDay)
(- (.toEpochDay (LocalDate/parse start-date)))
inc)]
(if (> days 60)
(let [prefix (format "s3://%s/sales-details/%s/" *bucket* entity-type)
sy (-> (LocalDate/parse start-date) .getYear)
ey (-> (LocalDate/parse end-date) .getYear)]
(if (= sy ey)
[(format "%s%d-*.parquet" prefix sy)]
(vec
(for [y (range sy (inc ey))]
(format "%s%d-*.parquet" prefix y)))))
(vec
(map (fn [d]
(format "'s3://%s/sales-details/%s/%s.parquet'"
*bucket* entity-type d))
(date-seq start-date end-date))))))
(defn parquet-query [entity-type start-date end-date]
"Build SQL to read all parquet files in date range.
Returns map with :sql and :count-sql keys."
(let [date-strs (date-seq start-date end-date)
urls (vec
(map (fn [d]
(format "'s3://%s/sales-details/%s/%s.parquet'"
*bucket* entity-type d))
date-strs))
sql (str "SELECT * FROM read_parquet(["
(str/join ", " urls)
"])")]
(let [globs (parquet-glob entity-type start-date end-date)
use-glob? (some #(.endsWith ^String % "*.parquet") globs)
base (if use-glob?
(format "SELECT * FROM read_parquet(%s, union_by_name=true)"
(if (= (count globs) 1)
(format "'%s'" (first globs))
(format "[%s]"
(str/join ", " (map #(format "'%s'" %) globs)))))
(format "SELECT * FROM read_parquet([%s])"
(str/join ", " globs)))
add-date-filter (fn [sql]
(if (> (-> (LocalDate/parse end-date)
(.toEpochDay)
(- (.toEpochDay (LocalDate/parse start-date)))
inc)
60)
(format "%s WHERE date >= '%s' AND date <= '%s'"
sql start-date end-date)
sql))
sql (add-date-filter base)]
{:sql sql
:count-sql (format "SELECT COUNT(*) FROM (%s) t" sql)}))
(defn- build-where-clause [opts field-pairs]
"Build SQL WHERE clause from opts map.
fields-with-keys is vector of [:field-key :env-var-name]."
(let [clauses (keep
(fn [[key env]]
(let [v (get opts key)]
(when v
(str env " = '" v "'"))))
field-pairs)]
(when (seq clauses)
(str " WHERE " (str/join " AND " clauses)))))
(defn- like-clause [col v]
(str "\"" col "\" LIKE '%" v "%'"))
(defn- build-sales-orders-where [opts]
(let [eq-clauses (keep
(fn [[key col]]
(let [v (get opts key)]
(when v
(str "\"" col "\" = '" v "'"))))
[[:client "client-code"]
[:vendor "vendor"]
[:location "location"]])
like-clauses (keep
(fn [[key col]]
(let [v (get opts key)]
(when v
(like-clause col v))))
[[:payment-method "payment-methods"]
[:processor "processors"]
[:category "categories"]])
range-clauses (keep
(fn [[key col op]]
(let [v (get opts key)]
(when v
(str "\"" col "\" " op " " v))))
[[:total-gte "total" ">="]
[:total-lte "total" "<="]])
all-clauses (concat eq-clauses like-clauses range-clauses)]
(when (seq all-clauses)
(str " WHERE " (str/join " AND " all-clauses)))))
(defn get-sales-orders
([start-date end-date]
(get-sales-orders start-date end-date {}))
([start-date end-date opts]
(let [q (parquet-query "sales-order"
start-date end-date)
base-sql (:sql q)
count-sql (:count-sql q)
sort (get opts :sort "date")
order (get opts :order "DESC")
limit (get opts :limit)
offset (get opts :offset)
where-str (build-where-clause
opts
[[:client "client-code"]
[:vendor "vendor"]
[:location "location"]])
full-sql (if where-str
(str base-sql where-str)
base-sql)
result (cond-> full-sql
sort (str " ORDER BY " sort
" " (name order))
limit (str " LIMIT " limit)
offset (str " OFFSET " offset))
full-count (if where-str
(str count-sql where-str)
count-sql)]
{:rows (query-rows result)
:count (or
(int
(query-scalar
full-count)) 0)})))
(try
(let [q (parquet-query "sales-order"
start-date end-date)
base-sql (:sql q)
count-sql (:count-sql q)
sort (get opts :sort "date")
order (get opts :order "DESC")
limit (get opts :limit)
offset (get opts :offset)
where-str (build-sales-orders-where opts)
full-sql (if where-str
(str base-sql where-str)
base-sql)
result (cond-> full-sql
sort (str " ORDER BY " sort
" " (name order))
limit (str " LIMIT " limit)
offset (str " OFFSET " offset))
full-count (if where-str
(str count-sql where-str)
count-sql)]
{:rows (query-rows result)
:count (or
(int
(query-scalar
full-count)) 0)})
(catch Exception _
{:rows [] :count 0}))))
(defn get-sales-orders-summary
([start-date end-date]
(get-sales-orders-summary start-date end-date {}))
([start-date end-date opts]
(try
(let [q (parquet-query "sales-order" start-date end-date)
base-sql (:sql q)
where-str (build-sales-orders-where opts)
full-sql (if where-str
(str base-sql where-str)
base-sql)
sum-sql (format "SELECT COALESCE(SUM(total), 0) as total, COALESCE(SUM(tax), 0) as tax FROM (%s) t" full-sql)
row (first (query-rows sum-sql))]
{:total (or (:total row) 0.0)
:tax (or (:tax row) 0.0)})
(catch Exception _
{:total 0.0 :tax 0.0}))))
(defn query-deduped [entity-type start-date end-date]
"Query records deduplicated by external-id (latest _seq_no wins)."