feat(sales): initial Parquet migration infrastructure

- Add DuckDB/S3 parquet storage layer (auto-ap.storage.parquet)
- Add sales_to_parquet migration script for historical data
- Add cleanup_sales for post-migration Datomic cleanup
- Add sales_orders_new.clj with DuckDB read layer for SSR views
- Add test scaffolding for parquet storage
- Add plan document for move-detailed-sales-to-parquet

feat(sales): redirect production and read flows to Parquet/DuckDB

- U3: Square production (upsert) now buffers to parquet via flatten-order-to-parquet!
- U3: EzCater core import-order now buffers to parquet instead of Datomic transact
- U3: EzCater XLS upload-xls now buffers to parquet instead of audit-transact
- U4: Rewrite sales_orders.clj to read from DuckDB via pq/get-sales-orders
- U5: Rewrite sales_summaries to use parquet aggregation functions
  - get-payment-items-parquet, get-discounts-parquet, get-refund-items-parquet
  - get-tax-parquet, get-tip-parquet, get-sales-parquet
- Add sum-* aggregation functions to storage/sales_summaries.clj
  - sum-discounts, sum-refunds-by-type, sum-taxes, sum-tips, sum-sales-by-category
This commit is contained in:
2026-04-25 07:43:41 -07:00
parent db9018722d
commit 26c9563a03
15 changed files with 1901 additions and 290 deletions

View File

@@ -0,0 +1,184 @@
(ns auto-ap.storage.sales-summaries
"Aggregation functions querying Parquet files on S3 via DuckDB.
Entity types: sales-order | charge | line-item | sales-refund
S3 pattern: s3://<bucket>/sales-details/<entity-type>/<YYYY-MM-DD>.parquet"
(:require [auto-ap.storage.parquet :as p]
[clojure.string :as str]))
(defn- dq [name]
(str "\"" name "\""))
(defn- sum-dbl [val]
(try
(if val (double val) 0.0)
(catch Exception _e
0.0)))
(defn- pq-files [entity-type start-date end-date]
"Vector of S3 parquet file paths for date range."
(let [dates (p/date-seq start-date end-date)]
(vec
(map #(str "'s3://" p/*bucket*
"/sales-details/" entity-type "/"
% ".parquet") dates))))
(defn sum-payments-by-type [client-id start-date end-date]
"Return {processor-key -> {type-name-string -> total-double}}."
(let [files (pq-files "charge" start-date end-date)]
(try
(let [sql (str "SELECT "
(dq "processor")
" AS proc, "
(dq "type-name")
" AS type_name, "
"SUM("
(dq "total")
")::DOUBLE AS total_amount "
"FROM read_parquet(["
(str/join ", " files)
"]) "
"WHERE "
(dq "client-code")
" = '" client-id "' "
"GROUP BY "
(dq "processor") ", "
(dq "type-name"))]
(let [rows (p/query-rows sql)]
(reduce (fn [acc row]
(let [proc (:proc row)
tname (str/trim (name (:type_name row)))
total (sum-dbl (:total_amount row))]
(update acc proc
(fn [inner]
(let [b (or inner {})]
(assoc b
tname
(+ (get b tname 0.0) total)))))))
{}
rows)))
(catch Exception e
(println "[sales-summaries]" (.getMessage e))
{}))))
(defn sum-discounts [client-id start-date end-date]
(let [files (pq-files "sales-order" start-date end-date)]
(try
(let [sql (str "SELECT SUM("
(dq "discount")
")::DOUBLE AS discount_total "
"FROM read_parquet(["
(str/join ", " files)
"]) "
"WHERE "
(dq "client-code")
" = '" client-id "'")]
(or (some-> (first (p/query-rows sql)) :discount_total sum-dbl) 0.0))
(catch Exception e
(println "[sales-summaries/discounts]" (.getMessage e))
0.0))))
(defn sum-refunds-by-type [client-id start-date end-date]
(let [files (pq-files "sales-refund" start-date end-date)]
(try
(let [sql (str "SELECT "
(dq "type-name")
" AS type_name, "
"SUM("
(dq "total")
")::DOUBLE AS total_amount "
"FROM read_parquet(["
(str/join ", " files)
"]) "
"WHERE "
(dq "sales-order-external-id")
" IN (SELECT "
(dq "external-id")
" FROM read_parquet(["
(str/join ", " (pq-files "sales-order" start-date end-date))
"]) WHERE "
(dq "client-code")
" = '" client-id "') "
"GROUP BY " (dq "type-name"))]
(let [rows (p/query-rows sql)]
(reduce (fn [acc row]
(let [tname (str/trim (name (:type_name row)))
total (sum-dbl (:total_amount row))]
(assoc acc tname (+ (get acc tname 0.0) total))))
{}
rows)))
(catch Exception e
(println "[sales-summaries/refunds]" (.getMessage e))
{}))))
(defn sum-taxes [client-id start-date end-date]
(let [files (pq-files "sales-order" start-date end-date)]
(try
(let [sql (str "SELECT SUM("
(dq "tax")
")::DOUBLE AS tax_total "
"FROM read_parquet(["
(str/join ", " files)
"]) "
"WHERE "
(dq "client-code")
" = '" client-id "'")]
(or (some-> (first (p/query-rows sql)) :tax_total sum-dbl) 0.0))
(catch Exception e
(println "[sales-summaries/tax]" (.getMessage e))
0.0))))
(defn sum-tips [client-id start-date end-date]
(let [files (pq-files "sales-order" start-date end-date)]
(try
(let [sql (str "SELECT SUM("
(dq "tip")
")::DOUBLE AS tip_total "
"FROM read_parquet(["
(str/join ", " files)
"]) "
"WHERE "
(dq "client-code")
" = '" client-id "'")]
(or (some-> (first (p/query-rows sql)) :tip_total sum-dbl) 0.0))
(catch Exception e
(println "[sales-summaries/tip]" (.getMessage e))
0.0))))
(defn sum-sales-by-category [client-id start-date end-date]
(let [files (pq-files "line-item" start-date end-date)]
(try
(let [sql (str "SELECT "
(dq "category")
" AS category, "
"SUM("
(dq "total")
")::DOUBLE AS total_amount, "
"SUM("
(dq "tax")
")::DOUBLE AS tax_amount, "
"SUM("
(dq "discount")
")::DOUBLE AS discount_amount "
"FROM read_parquet(["
(str/join ", " files)
"]) "
"WHERE "
(dq "sales-order-external-id")
" IN (SELECT "
(dq "external-id")
" FROM read_parquet(["
(str/join ", " (pq-files "sales-order" start-date end-date))
"]) WHERE "
(dq "client-code")
" = '" client-id "') "
"GROUP BY " (dq "category"))]
(let [rows (p/query-rows sql)]
(mapv (fn [row]
{:category (or (:category row) "Unknown")
:total (sum-dbl (:total_amount row))
:tax (sum-dbl (:tax_amount row))
:discount (sum-dbl (:discount_amount row))})
rows)))
(catch Exception e
(println "[sales-summaries/sales]" (.getMessage e))
[]))))