fixes
This commit is contained in:
271
scratch-sessions/sysco_recode_scratch.clj
Normal file
271
scratch-sessions/sysco_recode_scratch.clj
Normal file
@@ -0,0 +1,271 @@
|
||||
;; =====================================================================
|
||||
;; ONE-OFF SCRATCH — re-code already-imported Sysco invoices after fixing
|
||||
;; resources/sysco_line_item_mapping.csv.
|
||||
;;
|
||||
;; Context: the Sysco importer codes each line item by EXACT description
|
||||
;; match against sysco_line_item_mapping.csv, defaulting to GL 50000 when a
|
||||
;; description is missing (auto-ap.jobs.sysco/get-line-account). Missing
|
||||
;; PAPER & DISP (and other) descriptions landed in 50000 (Food Costs)
|
||||
;; instead of their real accounts (e.g. 55000 Paper Costs). The mapping is
|
||||
;; now fixed; this re-derives the correct split from each invoice's source
|
||||
;; CSV and rewrites :invoice/expense-accounts.
|
||||
;;
|
||||
;; Design:
|
||||
;; - Recode EVERY invoice found in the CSV resource (a Sysco file may batch
|
||||
;; several invoices; they're grouped by InvoiceNumber).
|
||||
;; - Build ONE transaction covering every invoice, emitting only the datoms
|
||||
;; that actually change:
|
||||
;; * reuse an existing invoice-expense-account when its account (and
|
||||
;; location) already match, updating just :amount when it differs;
|
||||
;; * add a child for an account that has no row yet;
|
||||
;; * retract a child whose account is no longer in the corrected split;
|
||||
;; * emit nothing for rows already correct.
|
||||
;; - Validate with (dc/with db changes): apply the tx to an in-memory db
|
||||
;; value and assert every affected invoice's expense-account amounts sum
|
||||
;; to its :invoice/total BEFORE committing for real.
|
||||
;; - After committing, touch the ledger for every affected invoice. This is
|
||||
;; a SEPARATE transaction on purpose: :upsert-invoice rebuilds the journal
|
||||
;; entry from the invoice's expense-accounts as seen in db-before, so it
|
||||
;; must run after the recode is committed.
|
||||
;;
|
||||
;; DO NOT load/evaluate this whole file. Step through the (comment ...) forms
|
||||
;; one at a time in a connected REPL; the commit + ledger steps are gated #_.
|
||||
;;
|
||||
;; PRECONDITIONS
|
||||
;; - The deployed artifact ships the fixed sysco_line_item_mapping.csv AND
|
||||
;; the invoice CSV at resources/sysco_recode/<file>.csv (io/resource).
|
||||
;; - You are connected to the DB you intend to mutate (prod conn!).
|
||||
;; =====================================================================
|
||||
|
||||
(comment
|
||||
|
||||
(require '[auto-ap.jobs.sysco :as sysco]
|
||||
'[auto-ap.datomic :refer [conn audit-transact random-tempid]]
|
||||
'[auto-ap.utils :refer [dollars=]]
|
||||
'[auto-ap.time :as t]
|
||||
'[clj-time.coerce :as coerce]
|
||||
'[clojure.data.csv :as csv]
|
||||
'[clojure.java.io :as io]
|
||||
'[datomic.api :as dc])
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 0 — reload the mapping cache so the corrected CSV is in effect.
|
||||
;; ------------------------------------------------------------------
|
||||
(reset! sysco/sysco-name->line nil)
|
||||
(count (sysco/get-sysco->line))
|
||||
|
||||
;; sanity: a previously-missing paper description now resolves to 55000.
|
||||
(dc/pull (dc/db conn) [:account/numeric-code :account/name]
|
||||
(sysco/get-line-account "BAG PAPER 250 CT")) ; => 55000
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; Helpers
|
||||
;; ------------------------------------------------------------------
|
||||
(defn read-csv-rows
|
||||
"Reads the invoice CSV from the classpath (so it ships with the deploy).
|
||||
`resource-path` is relative to a resources/ root, e.g. \"sysco_recode/bad.csv\"."
|
||||
[resource-path]
|
||||
(with-open [r (io/reader (or (io/resource resource-path)
|
||||
(throw (ex-info "CSV not found on classpath"
|
||||
{:resource-path resource-path}))))]
|
||||
(doall (csv/read-csv r))))
|
||||
|
||||
(defn parse-date
|
||||
"Sysco yyMMdd string -> java.util.Date, the same way the importer stores
|
||||
:invoice/date (auto-ap.jobs.sysco/extract-invoice-details)."
|
||||
[yymmdd]
|
||||
(coerce/to-date (t/parse yymmdd "yyMMdd")))
|
||||
|
||||
(defn group-invoices
|
||||
"Split a (possibly multi-invoice) Sysco CSV into one entry per invoice.
|
||||
Groups DET/HDR/SUM rows by InvoiceNumber (index 4); date comes from the
|
||||
group's HDR row InvoiceDate (index 10)."
|
||||
[rows]
|
||||
(->> rows
|
||||
(filter #(contains? #{"DET" "HDR" "SUM"} (nth % 5 nil)))
|
||||
(group-by #(nth % 4))
|
||||
(mapv (fn [[number grp]]
|
||||
(let [hdr (first (filter #(= "HDR" (nth % 5)) grp))]
|
||||
{:invoice-number number
|
||||
:date-str (some-> hdr (nth 10))
|
||||
:rows grp})))))
|
||||
|
||||
(defn desired-split
|
||||
"Rows of one Sysco invoice -> {account-eid -> amount-double}, using the
|
||||
CURRENT (fixed) mapping. DET rows only (record-type at index 5); tax
|
||||
(SUM row, TotalTaxAmount index 14) added to the same account the
|
||||
importer uses for \"TAX\". Mirrors auto-ap.jobs.sysco/code-individual-items."
|
||||
[rows]
|
||||
(let [det (filter #(= "DET" (nth % 5)) rows)
|
||||
sum-row (first (filter #(= "SUM" (nth % 5)) rows))
|
||||
tax (some-> sum-row (nth 14) Double/parseDouble)
|
||||
by-acct (reduce
|
||||
(fn [acc row]
|
||||
(update acc
|
||||
(sysco/get-line-account (nth row sysco/item-name-index))
|
||||
(fnil + 0.0)
|
||||
(Double/parseDouble (nth row sysco/item-price-index))))
|
||||
{}
|
||||
det)]
|
||||
(cond-> by-acct
|
||||
(and tax (not (zero? tax)))
|
||||
(update (sysco/get-line-account "TAX") (fnil + 0.0) tax))))
|
||||
|
||||
(defn resolve-eid
|
||||
"Match on invoice-number AND date (belt-and-suspenders). Asserts a unique
|
||||
hit so we never recode the wrong invoice."
|
||||
[invoice-number date]
|
||||
(let [ids (mapv first (dc/q '[:find ?i :in $ ?n ?d
|
||||
:where
|
||||
[?i :invoice/invoice-number ?n]
|
||||
[?i :invoice/date ?d]]
|
||||
(dc/db conn) invoice-number date))]
|
||||
(assert (>= 1 (count ids))
|
||||
(str "multiple invoices match " invoice-number " / " date ": " ids))
|
||||
(first ids)))
|
||||
|
||||
(defn invoice-change-datoms
|
||||
"Minimal tx-data to make invoice `eid`'s expense-account split equal
|
||||
`desired` ({account-eid -> amount}). Returns [] when already correct."
|
||||
[db eid desired]
|
||||
(let [existing (:invoice/expense-accounts
|
||||
(dc/pull db [{:invoice/expense-accounts
|
||||
[:db/id :invoice-expense-account/amount
|
||||
:invoice-expense-account/location
|
||||
{:invoice-expense-account/account [:db/id]}]}]
|
||||
eid))
|
||||
loc (or (some :invoice-expense-account/location existing) "HQ")
|
||||
;; one child per account expected; index by account, retract any dupes
|
||||
by-acct (group-by #(get-in % [:invoice-expense-account/account :db/id]) existing)
|
||||
one (into {} (map (fn [[a cs]] [a (first cs)])) by-acct)
|
||||
dupes (mapcat (fn [[_ cs]] (map :db/id (rest cs))) by-acct)
|
||||
wanted (set (keys desired))
|
||||
upserts (keep (fn [[acct amt]]
|
||||
(let [child (get one acct)]
|
||||
(cond
|
||||
;; new account -> accrete a child under the invoice
|
||||
(nil? child)
|
||||
{:db/id eid
|
||||
:invoice/expense-accounts
|
||||
[#:invoice-expense-account{:db/id (random-tempid)
|
||||
:account acct
|
||||
:location loc
|
||||
:amount amt}]}
|
||||
;; right account, wrong value -> reuse, set amount
|
||||
;; (and fix location if it drifted)
|
||||
(or (not (dollars= (:invoice-expense-account/amount child) amt))
|
||||
(not= (:invoice-expense-account/location child) loc))
|
||||
(cond-> {:db/id (:db/id child)
|
||||
:invoice-expense-account/amount amt}
|
||||
(not= (:invoice-expense-account/location child) loc)
|
||||
(assoc :invoice-expense-account/location loc))
|
||||
;; already correct -> nothing
|
||||
:else nil)))
|
||||
desired)
|
||||
retracts (for [[acct child] one :when (not (wanted acct))]
|
||||
[:db/retractEntity (:db/id child)])]
|
||||
(vec (concat upserts
|
||||
retracts
|
||||
(map (fn [id] [:db/retractEntity id]) dupes)))))
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 1 — point at the CSV. EVERY invoice in this file gets recoded.
|
||||
;; Place the file under resources/ (e.g. resources/sysco_recode/bad.csv)
|
||||
;; and commit it so it's on the classpath of the deployed artifact.
|
||||
;; ------------------------------------------------------------------
|
||||
(def csv-path "sysco_recode/bad.csv")
|
||||
(def rows (read-csv-rows csv-path))
|
||||
|
||||
(def invoices (group-invoices rows))
|
||||
(mapv (juxt :invoice-number :date-str) invoices) ;; what we found in the file
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 2 — resolve each invoice (number + date) and compute its split.
|
||||
;; ------------------------------------------------------------------
|
||||
(def plan
|
||||
(mapv (fn [{:keys [invoice-number date-str rows]}]
|
||||
(let [date (parse-date date-str)]
|
||||
{:invoice-number invoice-number
|
||||
:date date
|
||||
:eid (resolve-eid invoice-number date)
|
||||
:desired (desired-split rows)}))
|
||||
invoices))
|
||||
|
||||
;; bail if any invoice number didn't resolve
|
||||
(assert (every? :eid plan)
|
||||
(str "unresolved invoices: "
|
||||
(mapv (juxt :invoice-number :date) (remove :eid plan))))
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 3 — build the SINGLE changes-only transaction across all invoices.
|
||||
;; ------------------------------------------------------------------
|
||||
(def changes
|
||||
(let [db (dc/db conn)]
|
||||
(vec (mapcat (fn [{:keys [eid desired]}] (invoice-change-datoms db eid desired))
|
||||
plan))))
|
||||
|
||||
(count changes) ;; how many datoms we're actually changing
|
||||
changes ;; inspect the full minimal tx
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 4 — validate with dc/with: apply the tx to an in-memory db value
|
||||
;; and confirm every affected invoice still balances (sum of expense
|
||||
;; account amounts == :invoice/total).
|
||||
;; ------------------------------------------------------------------
|
||||
(def preview (dc/with (dc/db conn) changes))
|
||||
|
||||
(def balance-report
|
||||
(let [db-after (:db-after preview)]
|
||||
(mapv (fn [{:keys [eid invoice-number]}]
|
||||
(let [inv (dc/pull db-after
|
||||
[:invoice/total
|
||||
{:invoice/expense-accounts [:invoice-expense-account/amount]}]
|
||||
eid)
|
||||
s (reduce + 0.0 (map :invoice-expense-account/amount
|
||||
(:invoice/expense-accounts inv)))]
|
||||
{:invoice-number invoice-number
|
||||
:total (:invoice/total inv)
|
||||
:ea-sum s
|
||||
:ok? (dollars= s (:invoice/total inv))}))
|
||||
plan)))
|
||||
balance-report
|
||||
|
||||
;; HARD GATE — do not continue unless every invoice balances post-change.
|
||||
(assert (every? :ok? balance-report)
|
||||
(str "unbalanced after change: "
|
||||
(filterv (complement :ok?) balance-report)))
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 5 — COMMIT the recode (gated). One transaction, changes only.
|
||||
;; ------------------------------------------------------------------
|
||||
#_(audit-transact changes
|
||||
{:user/name "sysco recode (missing GL mappings fix)"
|
||||
:user/role "admin"})
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 6 — touch the ledger for every affected invoice (separate tx;
|
||||
;; :upsert-invoice rebuilds the journal entry from the now-committed
|
||||
;; expense-accounts). Gated.
|
||||
;; ------------------------------------------------------------------
|
||||
#_(audit-transact (mapv (fn [{:keys [eid]}] [:upsert-invoice {:db/id eid}]) plan)
|
||||
{:user/name "sysco recode ledger touch"
|
||||
:user/role "admin"})
|
||||
|
||||
;; ------------------------------------------------------------------
|
||||
;; STEP 7 — verify committed result.
|
||||
;; ------------------------------------------------------------------
|
||||
#_(let [db (dc/db conn)]
|
||||
(mapv (fn [{:keys [eid invoice-number]}]
|
||||
{:invoice-number invoice-number
|
||||
:accounts
|
||||
(->> (dc/pull db
|
||||
[{:invoice/expense-accounts
|
||||
[:invoice-expense-account/amount
|
||||
{:invoice-expense-account/account [:account/numeric-code]}]}]
|
||||
eid)
|
||||
:invoice/expense-accounts
|
||||
(map (juxt #(get-in % [:invoice-expense-account/account :account/numeric-code])
|
||||
:invoice-expense-account/amount))
|
||||
(sort-by first)
|
||||
vec)})
|
||||
plan)))
|
||||
Reference in New Issue
Block a user