;; ===================================================================== ;; ONE-OFF SCRATCH — re-code already-imported Sysco invoices after fixing ;; resources/sysco_line_item_mapping.csv. ;; ;; Context: the Sysco importer codes each line item by EXACT description ;; match against sysco_line_item_mapping.csv, defaulting to GL 50000 when a ;; description is missing (auto-ap.jobs.sysco/get-line-account). Missing ;; PAPER & DISP (and other) descriptions landed in 50000 (Food Costs) ;; instead of their real accounts (e.g. 55000 Paper Costs). The mapping is ;; now fixed; this re-derives the correct split from each invoice's source ;; CSV and rewrites :invoice/expense-accounts. ;; ;; Design: ;; - Recode EVERY invoice found in the CSV resource (a Sysco file may batch ;; several invoices; they're grouped by InvoiceNumber). ;; - Build ONE transaction covering every invoice, emitting only the datoms ;; that actually change: ;; * reuse an existing invoice-expense-account when its account (and ;; location) already match, updating just :amount when it differs; ;; * add a child for an account that has no row yet; ;; * retract a child whose account is no longer in the corrected split; ;; * emit nothing for rows already correct. ;; - Validate with (dc/with db changes): apply the tx to an in-memory db ;; value and assert every affected invoice's expense-account amounts sum ;; to its :invoice/total BEFORE committing for real. ;; - After committing, touch the ledger for every affected invoice. This is ;; a SEPARATE transaction on purpose: :upsert-invoice rebuilds the journal ;; entry from the invoice's expense-accounts as seen in db-before, so it ;; must run after the recode is committed. ;; ;; DO NOT load/evaluate this whole file. Step through the (comment ...) forms ;; one at a time in a connected REPL; the commit + ledger steps are gated #_. ;; ;; PRECONDITIONS ;; - The deployed artifact ships the fixed sysco_line_item_mapping.csv AND ;; the invoice CSV at resources/sysco_recode/.csv (io/resource). ;; - You are connected to the DB you intend to mutate (prod conn!). ;; ===================================================================== (comment (require '[auto-ap.jobs.sysco :as sysco] '[auto-ap.datomic :refer [conn audit-transact random-tempid]] '[auto-ap.utils :refer [dollars=]] '[auto-ap.time :as t] '[clj-time.coerce :as coerce] '[clojure.data.csv :as csv] '[clojure.java.io :as io] '[datomic.api :as dc]) ;; ------------------------------------------------------------------ ;; STEP 0 — reload the mapping cache so the corrected CSV is in effect. ;; ------------------------------------------------------------------ (reset! sysco/sysco-name->line nil) (count (sysco/get-sysco->line)) ;; sanity: a previously-missing paper description now resolves to 55000. (dc/pull (dc/db conn) [:account/numeric-code :account/name] (sysco/get-line-account "BAG PAPER 250 CT")) ; => 55000 ;; ------------------------------------------------------------------ ;; Helpers ;; ------------------------------------------------------------------ (defn read-csv-rows "Reads the invoice CSV from the classpath (so it ships with the deploy). `resource-path` is relative to a resources/ root, e.g. \"sysco_recode/bad.csv\"." [resource-path] (with-open [r (io/reader (or (io/resource resource-path) (throw (ex-info "CSV not found on classpath" {:resource-path resource-path}))))] (doall (csv/read-csv r)))) (defn parse-date "Sysco yyMMdd string -> java.util.Date, the same way the importer stores :invoice/date (auto-ap.jobs.sysco/extract-invoice-details)." [yymmdd] (coerce/to-date (t/parse yymmdd "yyMMdd"))) (defn group-invoices "Split a (possibly multi-invoice) Sysco CSV into one entry per invoice. Groups DET/HDR/SUM rows by InvoiceNumber (index 4); date comes from the group's HDR row InvoiceDate (index 10)." [rows] (->> rows (filter #(contains? #{"DET" "HDR" "SUM"} (nth % 5 nil))) (group-by #(nth % 4)) (mapv (fn [[number grp]] (let [hdr (first (filter #(= "HDR" (nth % 5)) grp))] {:invoice-number number :date-str (some-> hdr (nth 10)) :rows grp}))))) (defn desired-split "Rows of one Sysco invoice -> {account-eid -> amount-double}, using the CURRENT (fixed) mapping. DET rows only (record-type at index 5); tax (SUM row, TotalTaxAmount index 14) added to the same account the importer uses for \"TAX\". Mirrors auto-ap.jobs.sysco/code-individual-items." [rows] (let [det (filter #(= "DET" (nth % 5)) rows) sum-row (first (filter #(= "SUM" (nth % 5)) rows)) tax (some-> sum-row (nth 14) Double/parseDouble) by-acct (reduce (fn [acc row] (update acc (sysco/get-line-account (nth row sysco/item-name-index)) (fnil + 0.0) (Double/parseDouble (nth row sysco/item-price-index)))) {} det)] (cond-> by-acct (and tax (not (zero? tax))) (update (sysco/get-line-account "TAX") (fnil + 0.0) tax)))) (defn resolve-eid "Match on invoice-number AND date (belt-and-suspenders). Asserts a unique hit so we never recode the wrong invoice." [invoice-number date] (let [ids (mapv first (dc/q '[:find ?i :in $ ?n ?d :where [?i :invoice/invoice-number ?n] [?i :invoice/date ?d]] (dc/db conn) invoice-number date))] (assert (>= 1 (count ids)) (str "multiple invoices match " invoice-number " / " date ": " ids)) (first ids))) (defn invoice-change-datoms "Minimal tx-data to make invoice `eid`'s expense-account split equal `desired` ({account-eid -> amount}). Returns [] when already correct." [db eid desired] (let [existing (:invoice/expense-accounts (dc/pull db [{:invoice/expense-accounts [:db/id :invoice-expense-account/amount :invoice-expense-account/location {:invoice-expense-account/account [:db/id]}]}] eid)) loc (or (some :invoice-expense-account/location existing) "HQ") ;; one child per account expected; index by account, retract any dupes by-acct (group-by #(get-in % [:invoice-expense-account/account :db/id]) existing) one (into {} (map (fn [[a cs]] [a (first cs)])) by-acct) dupes (mapcat (fn [[_ cs]] (map :db/id (rest cs))) by-acct) wanted (set (keys desired)) upserts (keep (fn [[acct amt]] (let [child (get one acct)] (cond ;; new account -> accrete a child under the invoice (nil? child) {:db/id eid :invoice/expense-accounts [#:invoice-expense-account{:db/id (random-tempid) :account acct :location loc :amount amt}]} ;; right account, wrong value -> reuse, set amount ;; (and fix location if it drifted) (or (not (dollars= (:invoice-expense-account/amount child) amt)) (not= (:invoice-expense-account/location child) loc)) (cond-> {:db/id (:db/id child) :invoice-expense-account/amount amt} (not= (:invoice-expense-account/location child) loc) (assoc :invoice-expense-account/location loc)) ;; already correct -> nothing :else nil))) desired) retracts (for [[acct child] one :when (not (wanted acct))] [:db/retractEntity (:db/id child)])] (vec (concat upserts retracts (map (fn [id] [:db/retractEntity id]) dupes))))) ;; ------------------------------------------------------------------ ;; STEP 1 — point at the CSV. EVERY invoice in this file gets recoded. ;; Place the file under resources/ (e.g. resources/sysco_recode/bad.csv) ;; and commit it so it's on the classpath of the deployed artifact. ;; ------------------------------------------------------------------ (def csv-path "sysco_recode/bad.csv") (def rows (read-csv-rows csv-path)) (def invoices (group-invoices rows)) (mapv (juxt :invoice-number :date-str) invoices) ;; what we found in the file ;; ------------------------------------------------------------------ ;; STEP 2 — resolve each invoice (number + date) and compute its split. ;; ------------------------------------------------------------------ (def plan (mapv (fn [{:keys [invoice-number date-str rows]}] (let [date (parse-date date-str)] {:invoice-number invoice-number :date date :eid (resolve-eid invoice-number date) :desired (desired-split rows)})) invoices)) ;; bail if any invoice number didn't resolve (assert (every? :eid plan) (str "unresolved invoices: " (mapv (juxt :invoice-number :date) (remove :eid plan)))) ;; ------------------------------------------------------------------ ;; STEP 3 — build the SINGLE changes-only transaction across all invoices. ;; ------------------------------------------------------------------ (def changes (let [db (dc/db conn)] (vec (mapcat (fn [{:keys [eid desired]}] (invoice-change-datoms db eid desired)) plan)))) (count changes) ;; how many datoms we're actually changing changes ;; inspect the full minimal tx ;; ------------------------------------------------------------------ ;; STEP 4 — validate with dc/with: apply the tx to an in-memory db value ;; and confirm every affected invoice still balances (sum of expense ;; account amounts == :invoice/total). ;; ------------------------------------------------------------------ (def preview (dc/with (dc/db conn) changes)) (def balance-report (let [db-after (:db-after preview)] (mapv (fn [{:keys [eid invoice-number]}] (let [inv (dc/pull db-after [:invoice/total {:invoice/expense-accounts [:invoice-expense-account/amount]}] eid) s (reduce + 0.0 (map :invoice-expense-account/amount (:invoice/expense-accounts inv)))] {:invoice-number invoice-number :total (:invoice/total inv) :ea-sum s :ok? (dollars= s (:invoice/total inv))})) plan))) balance-report ;; HARD GATE — do not continue unless every invoice balances post-change. (assert (every? :ok? balance-report) (str "unbalanced after change: " (filterv (complement :ok?) balance-report))) ;; ------------------------------------------------------------------ ;; STEP 5 — COMMIT the recode (gated). One transaction, changes only. ;; ------------------------------------------------------------------ #_(audit-transact changes {:user/name "sysco recode (missing GL mappings fix)" :user/role "admin"}) ;; ------------------------------------------------------------------ ;; STEP 6 — touch the ledger for every affected invoice (separate tx; ;; :upsert-invoice rebuilds the journal entry from the now-committed ;; expense-accounts). Gated. ;; ------------------------------------------------------------------ #_(audit-transact (mapv (fn [{:keys [eid]}] [:upsert-invoice {:db/id eid}]) plan) {:user/name "sysco recode ledger touch" :user/role "admin"}) ;; ------------------------------------------------------------------ ;; STEP 7 — verify committed result. ;; ------------------------------------------------------------------ #_(let [db (dc/db conn)] (mapv (fn [{:keys [eid invoice-number]}] {:invoice-number invoice-number :accounts (->> (dc/pull db [{:invoice/expense-accounts [:invoice-expense-account/amount {:invoice-expense-account/account [:account/numeric-code]}]}] eid) :invoice/expense-accounts (map (juxt #(get-in % [:invoice-expense-account/account :account/numeric-code]) :invoice-expense-account/amount)) (sort-by first) vec)}) plan)))