This commit is contained in:
2026-06-04 22:59:51 -07:00
parent 019a1b4cd8
commit fc54b92ddb
2 changed files with 271 additions and 0 deletions

View File

@@ -0,0 +1,271 @@
;; =====================================================================
;; ONE-OFF SCRATCH — re-code already-imported Sysco invoices after fixing
;; resources/sysco_line_item_mapping.csv.
;;
;; Context: the Sysco importer codes each line item by EXACT description
;; match against sysco_line_item_mapping.csv, defaulting to GL 50000 when a
;; description is missing (auto-ap.jobs.sysco/get-line-account). Missing
;; PAPER & DISP (and other) descriptions landed in 50000 (Food Costs)
;; instead of their real accounts (e.g. 55000 Paper Costs). The mapping is
;; now fixed; this re-derives the correct split from each invoice's source
;; CSV and rewrites :invoice/expense-accounts.
;;
;; Design:
;; - Recode EVERY invoice found in the CSV resource (a Sysco file may batch
;; several invoices; they're grouped by InvoiceNumber).
;; - Build ONE transaction covering every invoice, emitting only the datoms
;; that actually change:
;; * reuse an existing invoice-expense-account when its account (and
;; location) already match, updating just :amount when it differs;
;; * add a child for an account that has no row yet;
;; * retract a child whose account is no longer in the corrected split;
;; * emit nothing for rows already correct.
;; - Validate with (dc/with db changes): apply the tx to an in-memory db
;; value and assert every affected invoice's expense-account amounts sum
;; to its :invoice/total BEFORE committing for real.
;; - After committing, touch the ledger for every affected invoice. This is
;; a SEPARATE transaction on purpose: :upsert-invoice rebuilds the journal
;; entry from the invoice's expense-accounts as seen in db-before, so it
;; must run after the recode is committed.
;;
;; DO NOT load/evaluate this whole file. Step through the (comment ...) forms
;; one at a time in a connected REPL; the commit + ledger steps are gated #_.
;;
;; PRECONDITIONS
;; - The deployed artifact ships the fixed sysco_line_item_mapping.csv AND
;; the invoice CSV at resources/sysco_recode/<file>.csv (io/resource).
;; - You are connected to the DB you intend to mutate (prod conn!).
;; =====================================================================
(comment
(require '[auto-ap.jobs.sysco :as sysco]
'[auto-ap.datomic :refer [conn audit-transact random-tempid]]
'[auto-ap.utils :refer [dollars=]]
'[auto-ap.time :as t]
'[clj-time.coerce :as coerce]
'[clojure.data.csv :as csv]
'[clojure.java.io :as io]
'[datomic.api :as dc])
;; ------------------------------------------------------------------
;; STEP 0 — reload the mapping cache so the corrected CSV is in effect.
;; ------------------------------------------------------------------
(reset! sysco/sysco-name->line nil)
(count (sysco/get-sysco->line))
;; sanity: a previously-missing paper description now resolves to 55000.
(dc/pull (dc/db conn) [:account/numeric-code :account/name]
(sysco/get-line-account "BAG PAPER 250 CT")) ; => 55000
;; ------------------------------------------------------------------
;; Helpers
;; ------------------------------------------------------------------
(defn read-csv-rows
"Reads the invoice CSV from the classpath (so it ships with the deploy).
`resource-path` is relative to a resources/ root, e.g. \"sysco_recode/bad.csv\"."
[resource-path]
(with-open [r (io/reader (or (io/resource resource-path)
(throw (ex-info "CSV not found on classpath"
{:resource-path resource-path}))))]
(doall (csv/read-csv r))))
(defn parse-date
"Sysco yyMMdd string -> java.util.Date, the same way the importer stores
:invoice/date (auto-ap.jobs.sysco/extract-invoice-details)."
[yymmdd]
(coerce/to-date (t/parse yymmdd "yyMMdd")))
(defn group-invoices
"Split a (possibly multi-invoice) Sysco CSV into one entry per invoice.
Groups DET/HDR/SUM rows by InvoiceNumber (index 4); date comes from the
group's HDR row InvoiceDate (index 10)."
[rows]
(->> rows
(filter #(contains? #{"DET" "HDR" "SUM"} (nth % 5 nil)))
(group-by #(nth % 4))
(mapv (fn [[number grp]]
(let [hdr (first (filter #(= "HDR" (nth % 5)) grp))]
{:invoice-number number
:date-str (some-> hdr (nth 10))
:rows grp})))))
(defn desired-split
"Rows of one Sysco invoice -> {account-eid -> amount-double}, using the
CURRENT (fixed) mapping. DET rows only (record-type at index 5); tax
(SUM row, TotalTaxAmount index 14) added to the same account the
importer uses for \"TAX\". Mirrors auto-ap.jobs.sysco/code-individual-items."
[rows]
(let [det (filter #(= "DET" (nth % 5)) rows)
sum-row (first (filter #(= "SUM" (nth % 5)) rows))
tax (some-> sum-row (nth 14) Double/parseDouble)
by-acct (reduce
(fn [acc row]
(update acc
(sysco/get-line-account (nth row sysco/item-name-index))
(fnil + 0.0)
(Double/parseDouble (nth row sysco/item-price-index))))
{}
det)]
(cond-> by-acct
(and tax (not (zero? tax)))
(update (sysco/get-line-account "TAX") (fnil + 0.0) tax))))
(defn resolve-eid
"Match on invoice-number AND date (belt-and-suspenders). Asserts a unique
hit so we never recode the wrong invoice."
[invoice-number date]
(let [ids (mapv first (dc/q '[:find ?i :in $ ?n ?d
:where
[?i :invoice/invoice-number ?n]
[?i :invoice/date ?d]]
(dc/db conn) invoice-number date))]
(assert (>= 1 (count ids))
(str "multiple invoices match " invoice-number " / " date ": " ids))
(first ids)))
(defn invoice-change-datoms
"Minimal tx-data to make invoice `eid`'s expense-account split equal
`desired` ({account-eid -> amount}). Returns [] when already correct."
[db eid desired]
(let [existing (:invoice/expense-accounts
(dc/pull db [{:invoice/expense-accounts
[:db/id :invoice-expense-account/amount
:invoice-expense-account/location
{:invoice-expense-account/account [:db/id]}]}]
eid))
loc (or (some :invoice-expense-account/location existing) "HQ")
;; one child per account expected; index by account, retract any dupes
by-acct (group-by #(get-in % [:invoice-expense-account/account :db/id]) existing)
one (into {} (map (fn [[a cs]] [a (first cs)])) by-acct)
dupes (mapcat (fn [[_ cs]] (map :db/id (rest cs))) by-acct)
wanted (set (keys desired))
upserts (keep (fn [[acct amt]]
(let [child (get one acct)]
(cond
;; new account -> accrete a child under the invoice
(nil? child)
{:db/id eid
:invoice/expense-accounts
[#:invoice-expense-account{:db/id (random-tempid)
:account acct
:location loc
:amount amt}]}
;; right account, wrong value -> reuse, set amount
;; (and fix location if it drifted)
(or (not (dollars= (:invoice-expense-account/amount child) amt))
(not= (:invoice-expense-account/location child) loc))
(cond-> {:db/id (:db/id child)
:invoice-expense-account/amount amt}
(not= (:invoice-expense-account/location child) loc)
(assoc :invoice-expense-account/location loc))
;; already correct -> nothing
:else nil)))
desired)
retracts (for [[acct child] one :when (not (wanted acct))]
[:db/retractEntity (:db/id child)])]
(vec (concat upserts
retracts
(map (fn [id] [:db/retractEntity id]) dupes)))))
;; ------------------------------------------------------------------
;; STEP 1 — point at the CSV. EVERY invoice in this file gets recoded.
;; Place the file under resources/ (e.g. resources/sysco_recode/bad.csv)
;; and commit it so it's on the classpath of the deployed artifact.
;; ------------------------------------------------------------------
(def csv-path "sysco_recode/bad.csv")
(def rows (read-csv-rows csv-path))
(def invoices (group-invoices rows))
(mapv (juxt :invoice-number :date-str) invoices) ;; what we found in the file
;; ------------------------------------------------------------------
;; STEP 2 — resolve each invoice (number + date) and compute its split.
;; ------------------------------------------------------------------
(def plan
(mapv (fn [{:keys [invoice-number date-str rows]}]
(let [date (parse-date date-str)]
{:invoice-number invoice-number
:date date
:eid (resolve-eid invoice-number date)
:desired (desired-split rows)}))
invoices))
;; bail if any invoice number didn't resolve
(assert (every? :eid plan)
(str "unresolved invoices: "
(mapv (juxt :invoice-number :date) (remove :eid plan))))
;; ------------------------------------------------------------------
;; STEP 3 — build the SINGLE changes-only transaction across all invoices.
;; ------------------------------------------------------------------
(def changes
(let [db (dc/db conn)]
(vec (mapcat (fn [{:keys [eid desired]}] (invoice-change-datoms db eid desired))
plan))))
(count changes) ;; how many datoms we're actually changing
changes ;; inspect the full minimal tx
;; ------------------------------------------------------------------
;; STEP 4 — validate with dc/with: apply the tx to an in-memory db value
;; and confirm every affected invoice still balances (sum of expense
;; account amounts == :invoice/total).
;; ------------------------------------------------------------------
(def preview (dc/with (dc/db conn) changes))
(def balance-report
(let [db-after (:db-after preview)]
(mapv (fn [{:keys [eid invoice-number]}]
(let [inv (dc/pull db-after
[:invoice/total
{:invoice/expense-accounts [:invoice-expense-account/amount]}]
eid)
s (reduce + 0.0 (map :invoice-expense-account/amount
(:invoice/expense-accounts inv)))]
{:invoice-number invoice-number
:total (:invoice/total inv)
:ea-sum s
:ok? (dollars= s (:invoice/total inv))}))
plan)))
balance-report
;; HARD GATE — do not continue unless every invoice balances post-change.
(assert (every? :ok? balance-report)
(str "unbalanced after change: "
(filterv (complement :ok?) balance-report)))
;; ------------------------------------------------------------------
;; STEP 5 — COMMIT the recode (gated). One transaction, changes only.
;; ------------------------------------------------------------------
#_(audit-transact changes
{:user/name "sysco recode (missing GL mappings fix)"
:user/role "admin"})
;; ------------------------------------------------------------------
;; STEP 6 — touch the ledger for every affected invoice (separate tx;
;; :upsert-invoice rebuilds the journal entry from the now-committed
;; expense-accounts). Gated.
;; ------------------------------------------------------------------
#_(audit-transact (mapv (fn [{:keys [eid]}] [:upsert-invoice {:db/id eid}]) plan)
{:user/name "sysco recode ledger touch"
:user/role "admin"})
;; ------------------------------------------------------------------
;; STEP 7 — verify committed result.
;; ------------------------------------------------------------------
#_(let [db (dc/db conn)]
(mapv (fn [{:keys [eid invoice-number]}]
{:invoice-number invoice-number
:accounts
(->> (dc/pull db
[{:invoice/expense-accounts
[:invoice-expense-account/amount
{:invoice-expense-account/account [:account/numeric-code]}]}]
eid)
:invoice/expense-accounts
(map (juxt #(get-in % [:invoice-expense-account/account :account/numeric-code])
:invoice-expense-account/amount))
(sort-by first)
vec)})
plan)))