- Fix double ORDER BY in sales_orders raw-graphql-ids (was passing full ORDER BY clause from build-sort-clause into get-sales-orders which prepends its own ORDER BY, producing 'ORDER BY ORDER BY ... DESC DESC') - Fix WHERE clause column names in parquet build-where-clause: external_id.client -> client-code, external_id.vendor -> vendor - Fix parquet-query format string (%%s -> %s with proper format call) - Fix ex-info call signature in flush! (was passing :error as third arg instead of inside the data map) - Add S3 credentials to DuckDB connect! so httpfs can read from S3 - Fix parquet buffer indentation and alignment across square/core3, ezcater/core, ezcater_xls, payments, sales_summaries, migrations - Fix broken Datomic query syntax in ezcater/core (upsert-used-subscriptions, upsert-recent find/where clauses mangled by paren-repair) - Uncomment accidentally commented code block in square/core3 - Fix paren/indentation issues in ssr/payments, jobs/sales_summaries
220 lines
7.7 KiB
Clojure
220 lines
7.7 KiB
Clojure
(ns auto-ap.migration.cleanup-sales
|
|
(:require [auto-ap.datomic :refer [conn]]
|
|
[auto-ap.storage.parquet :as pq]
|
|
[amazonica.aws.s3 :as s3]
|
|
[datomic.api :as d-api]
|
|
[clojure.string :as str]))
|
|
|
|
(def ^:private BATCH-SIZE 1000)
|
|
(def ^:private DRY-RUN? true)
|
|
|
|
(defn- set-dry-run! [v]
|
|
(alter-var-root #'DRY-RUN? (constantly v)))
|
|
|
|
; -- query helpers
|
|
|
|
(defn- query-sales-order-ids
|
|
"Return all entity IDs that have :sales-order/external-id."
|
|
[db]
|
|
(->> (d-api/q '[:find ?e
|
|
:where [?e :sales-order/external-id]]
|
|
db)
|
|
(map first)))
|
|
|
|
(defn- collect-child-ids
|
|
"Gather child entity IDs for a batch of sales orders. Returns map with
|
|
keys :orders, :charges, :line-items, :refunds — each a vector of
|
|
entity IDs eligible for retraction."
|
|
[db order-ids]
|
|
(let [order-set (set order-ids)
|
|
charges (->> (d-api/q '[:find ?c
|
|
:in $ [?o ...]
|
|
:where [$ ?o :sales-order/charges ?c]]
|
|
db order-set)
|
|
(map second))
|
|
refunds (->> (d-api/q '[:find ?r
|
|
:in $ [?o ...]
|
|
:where [$ ?o :sales-order/refunds ?r]]
|
|
db order-set)
|
|
(map second))
|
|
line-items (->> (d-api/q '[:find ?li
|
|
:in $ [?c ...]
|
|
:where [$ ?c :charge/line-items ?li]]
|
|
db charges)
|
|
(map second))]
|
|
{:orders order-ids
|
|
:charges (vec charges)
|
|
:line-items (vec line-items)
|
|
:refunds (vec refunds)}))
|
|
|
|
; -- transaction batching
|
|
|
|
(defn- batch-transact
|
|
"Issue [:db/retractEntity ...] transactions in batches of BATCH-SIZE.
|
|
conn$ is a Datomic connection object.
|
|
entity-ids should be a seq of Long entity IDs."
|
|
[conn entity-ids]
|
|
(let [batches (partition-all BATCH-SIZE entity-ids)
|
|
_ (doseq [[idx batch] (map-indexed vector batches)]
|
|
(let [n (count batch)
|
|
txes (map (fn [eid]
|
|
[:db/retractEntity eid])
|
|
batch)]
|
|
(println " batch" idx ":" n "retracts")
|
|
(when-not DRY-RUN?
|
|
@(d-api/transact conn txes))))]
|
|
:done))
|
|
|
|
(defn- retract-all-child-ids!
|
|
"Retract orders, charges, line-items and refunds from all entity-ID
|
|
maps produced by collect-child-ids. Logs progress every batch."
|
|
[conn child-entity-map]
|
|
(doseq [[type id-seq] child-entity-map]
|
|
(when (seq id-seq)
|
|
(println "retracting" type ":" (count id-seq) "ids")
|
|
(batch-transact conn id-seq))))
|
|
|
|
; -- month grouping
|
|
|
|
(defn- group-orders-by-month
|
|
"Group sales order entity IDs by [year month] extracted from
|
|
:sales-order/day-value. Returns map {{y m} [eid ...]}."
|
|
[db order-ids]
|
|
(reduce (fn [acc eid]
|
|
(when-let [day-val (:sales-order/day-value
|
|
(d-api/entity db eid))]
|
|
(let [[y m _] (str/split (str day-val) #"-")
|
|
k [(Integer/parseInt y)
|
|
(Integer/parseInt m)]]
|
|
(update acc k conj eid))))
|
|
{}
|
|
order-ids))
|
|
|
|
; -- S3 verification (uses amazonica + parquet module)
|
|
|
|
(def ENTITY-TYPES ["sales-order" "charge"
|
|
"line-item" "sales-refund"])
|
|
|
|
(defn- s3-keys-for-date
|
|
"Build S3 parquet keys for all entity types on a given date."
|
|
[date-str]
|
|
(mapv #(pq/parquet-key % date-str) ENTITY-TYPES))
|
|
|
|
(defn- days-in-month
|
|
"Return seq of YYYY-MM-DD strings for all days in [year month]."
|
|
[year month]
|
|
(let [start (java.time.LocalDate/of year month 1)
|
|
first-of-next (.plusMonths start 1)
|
|
diff (.toEpochDay first-of-next)
|
|
start-day (.toEpochDay start)]
|
|
(for [d (range start-day diff)]
|
|
(.toString (java.time.LocalDate/ofEpochDay d)))))
|
|
|
|
(defn- object-exists?
|
|
"Check if an S3 object exists by attempting get-object."
|
|
[key]
|
|
(try
|
|
(s3/get-object {:bucket-name pq/*bucket*
|
|
:key key})
|
|
true
|
|
(catch com.amazonaws.services.s3.model.AmazonS3Exception _
|
|
false)))
|
|
|
|
(defn- verify-month-in-s3?
|
|
"Check that every day in [year month] has at least one backing
|
|
Parquet file on S3 across all entity types.
|
|
Returns a map {:ok bool :missing vec-of-dates}."
|
|
[year month]
|
|
(let [dates (days-in-month year month)]
|
|
(loop [[d & rest] dates
|
|
result []]
|
|
(if-not d
|
|
{:ok (empty? result)
|
|
:missing result}
|
|
(let [keys (s3-keys-for-date d)
|
|
found? (some object-exists? keys)]
|
|
(recur rest
|
|
(if found?
|
|
result
|
|
(conj result d))))))))
|
|
|
|
; -- public API: delete-by-month
|
|
|
|
(defn- delete-by-month [conn client-entid year month]
|
|
"Retract all sales entities for a specific year+month.
|
|
Returns :ok on success, :skipped if S3 verification failed."
|
|
(println "=== deleting" year "-" month
|
|
"dry-run? =" DRY-RUN?)
|
|
(let [db (d-api/db conn)
|
|
all-ids (query-sales-order-ids db)
|
|
group (group-orders-by-month db all-ids)
|
|
target-keys (get group [year month] [])]
|
|
(if (zero? (count target-keys))
|
|
(do (println " no orders found for" year "-" month)
|
|
:skipped)
|
|
(do
|
|
(let [child-maps (collect-child-ids db target-keys)
|
|
total-ids (->> child-maps vals
|
|
(reduce into [])
|
|
distinct
|
|
count)]
|
|
(println " " total-ids "total entities to retract")
|
|
(when-not DRY-RUN?
|
|
(retract-all-child-ids! conn child-maps)))
|
|
:ok))))
|
|
|
|
; -- public API: cleanup-all
|
|
|
|
(defn cleanup-all []
|
|
"Remove ALL sales-order, charge, line-item, sales-refund from
|
|
Datomic. Uses d-api/transact to issue [:db/retractEntity ...] for
|
|
each entity. Iterates over every month found in DB."
|
|
(let [db (d-api/db conn)
|
|
all-ids (query-sales-order-ids db)
|
|
group (group-orders-by-month db all-ids)
|
|
months (sort (keys group))]
|
|
(println "found" (count months) "months of data")
|
|
(doseq [[y m] months]
|
|
(delete-by-month conn nil y m))
|
|
(println "cleanup-all complete")))
|
|
|
|
; -- public API: safe-cleanup-all
|
|
|
|
(defn- collect-all-months [conn]
|
|
"Return sorted vec of [year month] pairs with sales orders in DB."
|
|
(let [db (d-api/db conn)
|
|
all-ids (query-sales-order-ids db)
|
|
grouped (group-orders-by-month db all-ids)]
|
|
(sort (keys grouped))))
|
|
|
|
(defn safe-cleanup-all []
|
|
"Same as cleanup-all but verifies S3 data exists first.
|
|
Before deleting a month's entities, checks that parquet files
|
|
exist in auto-ap.storage.parquet bucket under prefix 'sales-details'."
|
|
(let [conn$ conn
|
|
months (collect-all-months conn)]
|
|
(println "=== safe-cleanup-all"
|
|
"months:" (count months)
|
|
"dry-run? =" DRY-RUN?)
|
|
(doseq [[_ y m] months]
|
|
(when-not DRY-RUN?
|
|
(let [result (verify-month-in-s3? y m)
|
|
missing (:missing result)]
|
|
(cond
|
|
(:ok result)
|
|
(do (println "verified" y "-" m "S3 OK, deleting...")
|
|
(delete-by-month conn$ nil y m))
|
|
|
|
(> (count missing) 0)
|
|
(do (println "ERROR" y "-" m "missing in S3:"
|
|
(str/join ", " missing))
|
|
(throw
|
|
(ex-info
|
|
"Missing S3 data — aborting!"
|
|
{:year y :month m
|
|
:missing missing})))
|
|
|
|
:else
|
|
(println "SKIPPING" y "-" m "no parquet files")))))
|
|
(println "safe-cleanup-all complete")))
|