272 lines
13 KiB
Clojure
272 lines
13 KiB
Clojure
;; =====================================================================
|
|
;; ONE-OFF SCRATCH — re-code already-imported Sysco invoices after fixing
|
|
;; resources/sysco_line_item_mapping.csv.
|
|
;;
|
|
;; Context: the Sysco importer codes each line item by EXACT description
|
|
;; match against sysco_line_item_mapping.csv, defaulting to GL 50000 when a
|
|
;; description is missing (auto-ap.jobs.sysco/get-line-account). Missing
|
|
;; PAPER & DISP (and other) descriptions landed in 50000 (Food Costs)
|
|
;; instead of their real accounts (e.g. 55000 Paper Costs). The mapping is
|
|
;; now fixed; this re-derives the correct split from each invoice's source
|
|
;; CSV and rewrites :invoice/expense-accounts.
|
|
;;
|
|
;; Design:
|
|
;; - Recode EVERY invoice found in the CSV resource (a Sysco file may batch
|
|
;; several invoices; they're grouped by InvoiceNumber).
|
|
;; - Build ONE transaction covering every invoice, emitting only the datoms
|
|
;; that actually change:
|
|
;; * reuse an existing invoice-expense-account when its account (and
|
|
;; location) already match, updating just :amount when it differs;
|
|
;; * add a child for an account that has no row yet;
|
|
;; * retract a child whose account is no longer in the corrected split;
|
|
;; * emit nothing for rows already correct.
|
|
;; - Validate with (dc/with db changes): apply the tx to an in-memory db
|
|
;; value and assert every affected invoice's expense-account amounts sum
|
|
;; to its :invoice/total BEFORE committing for real.
|
|
;; - After committing, touch the ledger for every affected invoice. This is
|
|
;; a SEPARATE transaction on purpose: :upsert-invoice rebuilds the journal
|
|
;; entry from the invoice's expense-accounts as seen in db-before, so it
|
|
;; must run after the recode is committed.
|
|
;;
|
|
;; DO NOT load/evaluate this whole file. Step through the (comment ...) forms
|
|
;; one at a time in a connected REPL; the commit + ledger steps are gated #_.
|
|
;;
|
|
;; PRECONDITIONS
|
|
;; - The deployed artifact ships the fixed sysco_line_item_mapping.csv AND
|
|
;; the invoice CSV at resources/sysco_recode/<file>.csv (io/resource).
|
|
;; - You are connected to the DB you intend to mutate (prod conn!).
|
|
;; =====================================================================
|
|
|
|
(comment
|
|
|
|
(require '[auto-ap.jobs.sysco :as sysco]
|
|
'[auto-ap.datomic :refer [conn audit-transact random-tempid]]
|
|
'[auto-ap.utils :refer [dollars=]]
|
|
'[auto-ap.time :as t]
|
|
'[clj-time.coerce :as coerce]
|
|
'[clojure.data.csv :as csv]
|
|
'[clojure.java.io :as io]
|
|
'[datomic.api :as dc])
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 0 — reload the mapping cache so the corrected CSV is in effect.
|
|
;; ------------------------------------------------------------------
|
|
(reset! sysco/sysco-name->line nil)
|
|
(count (sysco/get-sysco->line))
|
|
|
|
;; sanity: a previously-missing paper description now resolves to 55000.
|
|
(dc/pull (dc/db conn) [:account/numeric-code :account/name]
|
|
(sysco/get-line-account "BAG PAPER 250 CT")) ; => 55000
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; Helpers
|
|
;; ------------------------------------------------------------------
|
|
(defn read-csv-rows
|
|
"Reads the invoice CSV from the classpath (so it ships with the deploy).
|
|
`resource-path` is relative to a resources/ root, e.g. \"sysco_recode/bad.csv\"."
|
|
[resource-path]
|
|
(with-open [r (io/reader (or (io/resource resource-path)
|
|
(throw (ex-info "CSV not found on classpath"
|
|
{:resource-path resource-path}))))]
|
|
(doall (csv/read-csv r))))
|
|
|
|
(defn parse-date
|
|
"Sysco yyMMdd string -> java.util.Date, the same way the importer stores
|
|
:invoice/date (auto-ap.jobs.sysco/extract-invoice-details)."
|
|
[yymmdd]
|
|
(coerce/to-date (t/parse yymmdd "yyMMdd")))
|
|
|
|
(defn group-invoices
|
|
"Split a (possibly multi-invoice) Sysco CSV into one entry per invoice.
|
|
Groups DET/HDR/SUM rows by InvoiceNumber (index 4); date comes from the
|
|
group's HDR row InvoiceDate (index 10)."
|
|
[rows]
|
|
(->> rows
|
|
(filter #(contains? #{"DET" "HDR" "SUM"} (nth % 5 nil)))
|
|
(group-by #(nth % 4))
|
|
(mapv (fn [[number grp]]
|
|
(let [hdr (first (filter #(= "HDR" (nth % 5)) grp))]
|
|
{:invoice-number number
|
|
:date-str (some-> hdr (nth 10))
|
|
:rows grp})))))
|
|
|
|
(defn desired-split
|
|
"Rows of one Sysco invoice -> {account-eid -> amount-double}, using the
|
|
CURRENT (fixed) mapping. DET rows only (record-type at index 5); tax
|
|
(SUM row, TotalTaxAmount index 14) added to the same account the
|
|
importer uses for \"TAX\". Mirrors auto-ap.jobs.sysco/code-individual-items."
|
|
[rows]
|
|
(let [det (filter #(= "DET" (nth % 5)) rows)
|
|
sum-row (first (filter #(= "SUM" (nth % 5)) rows))
|
|
tax (some-> sum-row (nth 14) Double/parseDouble)
|
|
by-acct (reduce
|
|
(fn [acc row]
|
|
(update acc
|
|
(sysco/get-line-account (nth row sysco/item-name-index))
|
|
(fnil + 0.0)
|
|
(Double/parseDouble (nth row sysco/item-price-index))))
|
|
{}
|
|
det)]
|
|
(cond-> by-acct
|
|
(and tax (not (zero? tax)))
|
|
(update (sysco/get-line-account "TAX") (fnil + 0.0) tax))))
|
|
|
|
(defn resolve-eid
|
|
"Match on invoice-number AND date (belt-and-suspenders). Asserts a unique
|
|
hit so we never recode the wrong invoice."
|
|
[invoice-number date]
|
|
(let [ids (mapv first (dc/q '[:find ?i :in $ ?n ?d
|
|
:where
|
|
[?i :invoice/invoice-number ?n]
|
|
[?i :invoice/date ?d]]
|
|
(dc/db conn) invoice-number date))]
|
|
(assert (>= 1 (count ids))
|
|
(str "multiple invoices match " invoice-number " / " date ": " ids))
|
|
(first ids)))
|
|
|
|
(defn invoice-change-datoms
|
|
"Minimal tx-data to make invoice `eid`'s expense-account split equal
|
|
`desired` ({account-eid -> amount}). Returns [] when already correct."
|
|
[db eid desired]
|
|
(let [existing (:invoice/expense-accounts
|
|
(dc/pull db [{:invoice/expense-accounts
|
|
[:db/id :invoice-expense-account/amount
|
|
:invoice-expense-account/location
|
|
{:invoice-expense-account/account [:db/id]}]}]
|
|
eid))
|
|
loc (or (some :invoice-expense-account/location existing) "HQ")
|
|
;; one child per account expected; index by account, retract any dupes
|
|
by-acct (group-by #(get-in % [:invoice-expense-account/account :db/id]) existing)
|
|
one (into {} (map (fn [[a cs]] [a (first cs)])) by-acct)
|
|
dupes (mapcat (fn [[_ cs]] (map :db/id (rest cs))) by-acct)
|
|
wanted (set (keys desired))
|
|
upserts (keep (fn [[acct amt]]
|
|
(let [child (get one acct)]
|
|
(cond
|
|
;; new account -> accrete a child under the invoice
|
|
(nil? child)
|
|
{:db/id eid
|
|
:invoice/expense-accounts
|
|
[#:invoice-expense-account{:db/id (random-tempid)
|
|
:account acct
|
|
:location loc
|
|
:amount amt}]}
|
|
;; right account, wrong value -> reuse, set amount
|
|
;; (and fix location if it drifted)
|
|
(or (not (dollars= (:invoice-expense-account/amount child) amt))
|
|
(not= (:invoice-expense-account/location child) loc))
|
|
(cond-> {:db/id (:db/id child)
|
|
:invoice-expense-account/amount amt}
|
|
(not= (:invoice-expense-account/location child) loc)
|
|
(assoc :invoice-expense-account/location loc))
|
|
;; already correct -> nothing
|
|
:else nil)))
|
|
desired)
|
|
retracts (for [[acct child] one :when (not (wanted acct))]
|
|
[:db/retractEntity (:db/id child)])]
|
|
(vec (concat upserts
|
|
retracts
|
|
(map (fn [id] [:db/retractEntity id]) dupes)))))
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 1 — point at the CSV. EVERY invoice in this file gets recoded.
|
|
;; Place the file under resources/ (e.g. resources/sysco_recode/bad.csv)
|
|
;; and commit it so it's on the classpath of the deployed artifact.
|
|
;; ------------------------------------------------------------------
|
|
(def csv-path "sysco_recode/bad.csv")
|
|
(def rows (read-csv-rows csv-path))
|
|
|
|
(def invoices (group-invoices rows))
|
|
(mapv (juxt :invoice-number :date-str) invoices) ;; what we found in the file
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 2 — resolve each invoice (number + date) and compute its split.
|
|
;; ------------------------------------------------------------------
|
|
(def plan
|
|
(mapv (fn [{:keys [invoice-number date-str rows]}]
|
|
(let [date (parse-date date-str)]
|
|
{:invoice-number invoice-number
|
|
:date date
|
|
:eid (resolve-eid invoice-number date)
|
|
:desired (desired-split rows)}))
|
|
invoices))
|
|
|
|
;; bail if any invoice number didn't resolve
|
|
(assert (every? :eid plan)
|
|
(str "unresolved invoices: "
|
|
(mapv (juxt :invoice-number :date) (remove :eid plan))))
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 3 — build the SINGLE changes-only transaction across all invoices.
|
|
;; ------------------------------------------------------------------
|
|
(def changes
|
|
(let [db (dc/db conn)]
|
|
(vec (mapcat (fn [{:keys [eid desired]}] (invoice-change-datoms db eid desired))
|
|
plan))))
|
|
|
|
(count changes) ;; how many datoms we're actually changing
|
|
changes ;; inspect the full minimal tx
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 4 — validate with dc/with: apply the tx to an in-memory db value
|
|
;; and confirm every affected invoice still balances (sum of expense
|
|
;; account amounts == :invoice/total).
|
|
;; ------------------------------------------------------------------
|
|
(def preview (dc/with (dc/db conn) changes))
|
|
|
|
(def balance-report
|
|
(let [db-after (:db-after preview)]
|
|
(mapv (fn [{:keys [eid invoice-number]}]
|
|
(let [inv (dc/pull db-after
|
|
[:invoice/total
|
|
{:invoice/expense-accounts [:invoice-expense-account/amount]}]
|
|
eid)
|
|
s (reduce + 0.0 (map :invoice-expense-account/amount
|
|
(:invoice/expense-accounts inv)))]
|
|
{:invoice-number invoice-number
|
|
:total (:invoice/total inv)
|
|
:ea-sum s
|
|
:ok? (dollars= s (:invoice/total inv))}))
|
|
plan)))
|
|
balance-report
|
|
|
|
;; HARD GATE — do not continue unless every invoice balances post-change.
|
|
(assert (every? :ok? balance-report)
|
|
(str "unbalanced after change: "
|
|
(filterv (complement :ok?) balance-report)))
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 5 — COMMIT the recode (gated). One transaction, changes only.
|
|
;; ------------------------------------------------------------------
|
|
#_(audit-transact changes
|
|
{:user/name "sysco recode (missing GL mappings fix)"
|
|
:user/role "admin"})
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 6 — touch the ledger for every affected invoice (separate tx;
|
|
;; :upsert-invoice rebuilds the journal entry from the now-committed
|
|
;; expense-accounts). Gated.
|
|
;; ------------------------------------------------------------------
|
|
#_(audit-transact (mapv (fn [{:keys [eid]}] [:upsert-invoice {:db/id eid}]) plan)
|
|
{:user/name "sysco recode ledger touch"
|
|
:user/role "admin"})
|
|
|
|
;; ------------------------------------------------------------------
|
|
;; STEP 7 — verify committed result.
|
|
;; ------------------------------------------------------------------
|
|
#_(let [db (dc/db conn)]
|
|
(mapv (fn [{:keys [eid invoice-number]}]
|
|
{:invoice-number invoice-number
|
|
:accounts
|
|
(->> (dc/pull db
|
|
[{:invoice/expense-accounts
|
|
[:invoice-expense-account/amount
|
|
{:invoice-expense-account/account [:account/numeric-code]}]}]
|
|
eid)
|
|
:invoice/expense-accounts
|
|
(map (juxt #(get-in % [:invoice-expense-account/account :account/numeric-code])
|
|
:invoice-expense-account/amount))
|
|
(sort-by first)
|
|
vec)})
|
|
plan)))
|