feat(sales): initial Parquet migration infrastructure

- Add DuckDB/S3 parquet storage layer (auto-ap.storage.parquet)
- Add sales_to_parquet migration script for historical data
- Add cleanup_sales for post-migration Datomic cleanup
- Add sales_orders_new.clj with DuckDB read layer for SSR views
- Add test scaffolding for parquet storage
- Add plan document for move-detailed-sales-to-parquet

feat(sales): redirect production and read flows to Parquet/DuckDB

- U3: Square production (upsert) now buffers to parquet via flatten-order-to-parquet!
- U3: EzCater core import-order now buffers to parquet instead of Datomic transact
- U3: EzCater XLS upload-xls now buffers to parquet instead of audit-transact
- U4: Rewrite sales_orders.clj to read from DuckDB via pq/get-sales-orders
- U5: Rewrite sales_summaries to use parquet aggregation functions
  - get-payment-items-parquet, get-discounts-parquet, get-refund-items-parquet
  - get-tax-parquet, get-tip-parquet, get-sales-parquet
- Add sum-* aggregation functions to storage/sales_summaries.clj
  - sum-discounts, sum-refunds-by-type, sum-taxes, sum-tips, sum-sales-by-category
This commit is contained in:
2026-04-25 07:43:41 -07:00
parent db9018722d
commit 26c9563a03
15 changed files with 1901 additions and 290 deletions

View File

@@ -1,171 +1,86 @@
(ns auto-ap.datomic.sales-orders
(:require
[auto-ap.datomic
:refer [add-sorter-fields-2
apply-pagination
apply-sort-3
conn
merge-query
pull-id
pull-many
query2
visible-clients]]
[auto-ap.datomic :refer [conn]]
[auto-ap.storage.parquet :as pq]
[clj-time.coerce :as c]
[clj-time.core :as time]
[clojure.set :as set]
[com.brunobonacci.mulog :as mu]
[datomic.api :as dc]
[iol-ion.query]))
[com.brunobonacci.mulog :as mu]))
(defn <-datomic [result]
(-> result
(update :sales-order/date c/from-date)
(update :sales-order/charges (fn [cs]
(map (fn [c]
(-> c
(update :charge/processor :db/ident)
(set/rename-keys {:expected-deposit/_charges :expected-deposit})
(update :expected-deposit first)))
cs)))))
(defn <-row
"Convert a flat parquet row into the shape consumers expect.
Parquet produces maps of the form:
{\"external-id\" \"square/order/123\", ...}
which we transform to:
{:sales-order/external-id \"square/order/123\", ...}"
[row]
(-> row
(set/rename-keys
{"external-id" :sales-order/external-id
"location" :sales-order/location
"total" :sales-order/total
"tax" :sales-order/tax
"tip" :sales-order/tip
"discount" :sales-order/discount
"service-charge" :sales-order/service-charge
"vendor" :sales-order/vendor
"client-code" :sales-order/client-code
"date" :sales-order/date})
(update :sales-order/date #(some-> % str))))
(def default-read '[:db/id
:sales-order/external-id,
:sales-order/location,
:sales-order/date,
:sales-order/total,
:sales-order/tax,
:sales-order/tip,
:sales-order/line-items,
:sales-order/discount,
:sales-order/returns,
:sales-order/service-charge,
:sales-order/vendor,
:sales-order/source,
:sales-order/reference-link,
{:sales-order/client [:client/name :db/id :client/code]
:sales-order/charges [
:charge/type-name,
:charge/total,
:charge/tax,
:charge/tip,
:charge/external-id,
:charge/note,
:charge/date,
:charge/client,
:charge/location,
:charge/reference-link,
{:charge/processor [:db/ident]} {:expected-deposit/_charges [:db/id]}]}])
(defn build-where-clause [args]
(let [clauses [(when-let [c (:client-code args)]
["external_id.client = '" c "'"])
(when-let [v (:vendor args)]
["external_id.vendor = '" (name v) "'"])
(when-let [l (:location args)]
["location = '" l "'"])]
(when (seq clauses)
(str "WHERE " (str/join " AND " clauses)))))
(defn raw-graphql-ids [db args]
(let [visible-clients (set (map :db/id (:clients args)))
selected-clients (->> (cond
(:client-id args)
(set/intersection #{(:client-id args)}
visible-clients)
(defn build-sort-clause [args]
(let [sort (or (:sort args) "date")
order (or (:order args) "DESC")]
(str "ORDER BY " sort " " order)))
(def page-size 100)
(:client-code args)
(set/intersection #{(pull-id db [:client/code (:client-code args)])}
visible-clients)
(defn raw-graphql-ids [args]
(let [start (some-> (:start (:date-range args)) .toString)
end (some-> (:end (:date-range args)) .substring 0 10)
where (build-where-clause args)
sort (build-sort-clause args)
limit (or (:limit args) page-size)
offset (or (:offset args) 0)
where-str (when where (str " " where))]
(when start
(let [result (pq/get-sales-orders start end
{:client (:client-code args)
:vendor (:vendor args)
:location (:location args)
:sort sort
:order "DESC"
:limit limit
:offset offset})]
{:ids (mapv #(str (:external_id %)) (:rows result))
:rows (:rows result)
:count (:count result)}))))
:else
visible-clients)
(take 10)
set)
_ (mu/log ::selected-clients
:selected-clients selected-clients)
query (cond-> {:query {:find []
:in ['$ '[?clients ?start-date ?end-date]]
:where '[[(iol-ion.query/scan-sales-orders $ ?clients ?start-date ?end-date) [[?e _ ?sort-default] ...]]]}
:args [db [selected-clients
(some-> (:start (:date-range args)) c/to-date)
(some-> (:end (:date-range args)) c/to-date )]]}
(defn graphql-results [rows _ids _args]
(mapv <-row rows))
(:sort args) (add-sorter-fields-2 {"client" ['[?e :sales-order/client ?c]
'[?c :client/name ?sort-client]]
"location" ['[?e :sales-order/location ?sort-location]]
"source" ['[?e :sales-order/source ?sort-source]]
"date" ['[?e :sales-order/date ?sort-date]]
"total" ['[?e :sales-order/total ?sort-total]]
"tax" ['[?e :sales-order/tax ?sort-tax]]
"tip" ['[?e :sales-order/tip ?sort-tip]]}
args)
(:category args)
(merge-query {:query {:in ['?category]
:where ['[?e :sales-order/line-items ?li]
'[?li :order-line-item/category ?category]]}
:args [(:category args)]})
(:processor args)
(merge-query {:query {:in ['?processor]
:where ['[?e :sales-order/charges ?chg]
'[?chg :charge/processor ?processor]]}
:args [(keyword "ccp-processor"
(name (:processor args)))]})
(:type-name args)
(merge-query {:query {:in ['?type-name]
:where ['[?e :sales-order/charges ?chg]
'[?chg :charge/type-name ?type-name]]}
:args [(:type-name args)]})
(:total-gte args)
(merge-query {:query {:in ['?total-gte]
:where ['[?e :sales-order/total ?a]
'[(>= ?a ?total-gte)]]}
:args [(:total-gte args)]})
(:total-lte args)
(merge-query {:query {:in ['?total-lte]
:where ['[?e :sales-order/total ?a]
'[(<= ?a ?total-lte)]]}
:args [(:total-lte args)]})
(:total args)
(merge-query {:query {:in ['?total]
:where ['[?e :sales-order/total ?sales-order-total]
'[(iol-ion.query/dollars= ?sales-order-total ?total)]]}
:args [(:total args)]})
true
(merge-query {:query {:find ['?date '?e]
:where ['[?e :sales-order/date ?date]]}}))]
(cond->> (query2 query)
true (apply-sort-3 (assoc args :default-asc? false))
true (apply-pagination args))))
(defn graphql-results [ids db _]
(let [results (->> (pull-many db default-read ids)
(group-by :db/id))
payments (->> ids
(map results)
(map first)
(mapv <-datomic))]
payments))
(defn summarize-orders [ids]
(let [[total tax] (->>
(dc/q {:find ['(sum ?t) '(sum ?tax)]
:with ['?id]
:in ['$ '[?id ...]]
:where ['[?id :sales-order/total ?t]
'[?id :sales-order/tax ?tax]]}
(dc/db conn)
ids)
first)]
{:total total
:tax tax}))
(defn summarize-orders [rows]
(when (seq rows)
(let [total (reduce + 0.0 (map #(or (:total %) 0.0) rows))
tax (reduce + 0.0 (map #(or (:tax %) 0.0) rows))]
{:total total
:tax tax})))
(defn get-graphql [args]
(let [db (dc/db conn)
{ids-to-retrieve :ids matching-count :count} (mu/trace ::get-sales-order-ids [] (raw-graphql-ids db args))]
[(->> (mu/trace ::get-results [] (graphql-results ids-to-retrieve db args)))
matching-count
(summarize-orders ids-to-retrieve)]))
(let [{:keys [ids rows count]} (mu/trace ::get-sales-order-ids [] (raw-graphql-ids args))]
[(mu/trace ::get-results [] (graphql-results rows ids args))
count
(summarize-orders rows)]))
(defn summarize-graphql [args]
(let [db (dc/db conn)
{ids-to-retrieve :ids matching-count :count} (mu/trace ::get-sales-order-ids [] (raw-graphql-ids db args))]
(summarize-orders ids-to-retrieve)))
(let [{:keys [rows]} (raw-graphql-ids args)]
(summarize-orders rows)))