diff --git a/scratch-sessions/sysco_recode/bad.csv b/resources/sysco_recode/bad.csv similarity index 100% rename from scratch-sessions/sysco_recode/bad.csv rename to resources/sysco_recode/bad.csv diff --git a/scratch-sessions/sysco_recode_scratch.clj b/scratch-sessions/sysco_recode_scratch.clj new file mode 100644 index 00000000..3df95609 --- /dev/null +++ b/scratch-sessions/sysco_recode_scratch.clj @@ -0,0 +1,271 @@ +;; ===================================================================== +;; ONE-OFF SCRATCH — re-code already-imported Sysco invoices after fixing +;; resources/sysco_line_item_mapping.csv. +;; +;; Context: the Sysco importer codes each line item by EXACT description +;; match against sysco_line_item_mapping.csv, defaulting to GL 50000 when a +;; description is missing (auto-ap.jobs.sysco/get-line-account). Missing +;; PAPER & DISP (and other) descriptions landed in 50000 (Food Costs) +;; instead of their real accounts (e.g. 55000 Paper Costs). The mapping is +;; now fixed; this re-derives the correct split from each invoice's source +;; CSV and rewrites :invoice/expense-accounts. +;; +;; Design: +;; - Recode EVERY invoice found in the CSV resource (a Sysco file may batch +;; several invoices; they're grouped by InvoiceNumber). +;; - Build ONE transaction covering every invoice, emitting only the datoms +;; that actually change: +;; * reuse an existing invoice-expense-account when its account (and +;; location) already match, updating just :amount when it differs; +;; * add a child for an account that has no row yet; +;; * retract a child whose account is no longer in the corrected split; +;; * emit nothing for rows already correct. +;; - Validate with (dc/with db changes): apply the tx to an in-memory db +;; value and assert every affected invoice's expense-account amounts sum +;; to its :invoice/total BEFORE committing for real. +;; - After committing, touch the ledger for every affected invoice. This is +;; a SEPARATE transaction on purpose: :upsert-invoice rebuilds the journal +;; entry from the invoice's expense-accounts as seen in db-before, so it +;; must run after the recode is committed. +;; +;; DO NOT load/evaluate this whole file. Step through the (comment ...) forms +;; one at a time in a connected REPL; the commit + ledger steps are gated #_. +;; +;; PRECONDITIONS +;; - The deployed artifact ships the fixed sysco_line_item_mapping.csv AND +;; the invoice CSV at resources/sysco_recode/.csv (io/resource). +;; - You are connected to the DB you intend to mutate (prod conn!). +;; ===================================================================== + +(comment + + (require '[auto-ap.jobs.sysco :as sysco] + '[auto-ap.datomic :refer [conn audit-transact random-tempid]] + '[auto-ap.utils :refer [dollars=]] + '[auto-ap.time :as t] + '[clj-time.coerce :as coerce] + '[clojure.data.csv :as csv] + '[clojure.java.io :as io] + '[datomic.api :as dc]) + + ;; ------------------------------------------------------------------ + ;; STEP 0 — reload the mapping cache so the corrected CSV is in effect. + ;; ------------------------------------------------------------------ + (reset! sysco/sysco-name->line nil) + (count (sysco/get-sysco->line)) + + ;; sanity: a previously-missing paper description now resolves to 55000. + (dc/pull (dc/db conn) [:account/numeric-code :account/name] + (sysco/get-line-account "BAG PAPER 250 CT")) ; => 55000 + + ;; ------------------------------------------------------------------ + ;; Helpers + ;; ------------------------------------------------------------------ + (defn read-csv-rows + "Reads the invoice CSV from the classpath (so it ships with the deploy). + `resource-path` is relative to a resources/ root, e.g. \"sysco_recode/bad.csv\"." + [resource-path] + (with-open [r (io/reader (or (io/resource resource-path) + (throw (ex-info "CSV not found on classpath" + {:resource-path resource-path}))))] + (doall (csv/read-csv r)))) + + (defn parse-date + "Sysco yyMMdd string -> java.util.Date, the same way the importer stores + :invoice/date (auto-ap.jobs.sysco/extract-invoice-details)." + [yymmdd] + (coerce/to-date (t/parse yymmdd "yyMMdd"))) + + (defn group-invoices + "Split a (possibly multi-invoice) Sysco CSV into one entry per invoice. + Groups DET/HDR/SUM rows by InvoiceNumber (index 4); date comes from the + group's HDR row InvoiceDate (index 10)." + [rows] + (->> rows + (filter #(contains? #{"DET" "HDR" "SUM"} (nth % 5 nil))) + (group-by #(nth % 4)) + (mapv (fn [[number grp]] + (let [hdr (first (filter #(= "HDR" (nth % 5)) grp))] + {:invoice-number number + :date-str (some-> hdr (nth 10)) + :rows grp}))))) + + (defn desired-split + "Rows of one Sysco invoice -> {account-eid -> amount-double}, using the + CURRENT (fixed) mapping. DET rows only (record-type at index 5); tax + (SUM row, TotalTaxAmount index 14) added to the same account the + importer uses for \"TAX\". Mirrors auto-ap.jobs.sysco/code-individual-items." + [rows] + (let [det (filter #(= "DET" (nth % 5)) rows) + sum-row (first (filter #(= "SUM" (nth % 5)) rows)) + tax (some-> sum-row (nth 14) Double/parseDouble) + by-acct (reduce + (fn [acc row] + (update acc + (sysco/get-line-account (nth row sysco/item-name-index)) + (fnil + 0.0) + (Double/parseDouble (nth row sysco/item-price-index)))) + {} + det)] + (cond-> by-acct + (and tax (not (zero? tax))) + (update (sysco/get-line-account "TAX") (fnil + 0.0) tax)))) + + (defn resolve-eid + "Match on invoice-number AND date (belt-and-suspenders). Asserts a unique + hit so we never recode the wrong invoice." + [invoice-number date] + (let [ids (mapv first (dc/q '[:find ?i :in $ ?n ?d + :where + [?i :invoice/invoice-number ?n] + [?i :invoice/date ?d]] + (dc/db conn) invoice-number date))] + (assert (>= 1 (count ids)) + (str "multiple invoices match " invoice-number " / " date ": " ids)) + (first ids))) + + (defn invoice-change-datoms + "Minimal tx-data to make invoice `eid`'s expense-account split equal + `desired` ({account-eid -> amount}). Returns [] when already correct." + [db eid desired] + (let [existing (:invoice/expense-accounts + (dc/pull db [{:invoice/expense-accounts + [:db/id :invoice-expense-account/amount + :invoice-expense-account/location + {:invoice-expense-account/account [:db/id]}]}] + eid)) + loc (or (some :invoice-expense-account/location existing) "HQ") + ;; one child per account expected; index by account, retract any dupes + by-acct (group-by #(get-in % [:invoice-expense-account/account :db/id]) existing) + one (into {} (map (fn [[a cs]] [a (first cs)])) by-acct) + dupes (mapcat (fn [[_ cs]] (map :db/id (rest cs))) by-acct) + wanted (set (keys desired)) + upserts (keep (fn [[acct amt]] + (let [child (get one acct)] + (cond + ;; new account -> accrete a child under the invoice + (nil? child) + {:db/id eid + :invoice/expense-accounts + [#:invoice-expense-account{:db/id (random-tempid) + :account acct + :location loc + :amount amt}]} + ;; right account, wrong value -> reuse, set amount + ;; (and fix location if it drifted) + (or (not (dollars= (:invoice-expense-account/amount child) amt)) + (not= (:invoice-expense-account/location child) loc)) + (cond-> {:db/id (:db/id child) + :invoice-expense-account/amount amt} + (not= (:invoice-expense-account/location child) loc) + (assoc :invoice-expense-account/location loc)) + ;; already correct -> nothing + :else nil))) + desired) + retracts (for [[acct child] one :when (not (wanted acct))] + [:db/retractEntity (:db/id child)])] + (vec (concat upserts + retracts + (map (fn [id] [:db/retractEntity id]) dupes))))) + + ;; ------------------------------------------------------------------ + ;; STEP 1 — point at the CSV. EVERY invoice in this file gets recoded. + ;; Place the file under resources/ (e.g. resources/sysco_recode/bad.csv) + ;; and commit it so it's on the classpath of the deployed artifact. + ;; ------------------------------------------------------------------ + (def csv-path "sysco_recode/bad.csv") + (def rows (read-csv-rows csv-path)) + + (def invoices (group-invoices rows)) + (mapv (juxt :invoice-number :date-str) invoices) ;; what we found in the file + + ;; ------------------------------------------------------------------ + ;; STEP 2 — resolve each invoice (number + date) and compute its split. + ;; ------------------------------------------------------------------ + (def plan + (mapv (fn [{:keys [invoice-number date-str rows]}] + (let [date (parse-date date-str)] + {:invoice-number invoice-number + :date date + :eid (resolve-eid invoice-number date) + :desired (desired-split rows)})) + invoices)) + + ;; bail if any invoice number didn't resolve + (assert (every? :eid plan) + (str "unresolved invoices: " + (mapv (juxt :invoice-number :date) (remove :eid plan)))) + + ;; ------------------------------------------------------------------ + ;; STEP 3 — build the SINGLE changes-only transaction across all invoices. + ;; ------------------------------------------------------------------ + (def changes + (let [db (dc/db conn)] + (vec (mapcat (fn [{:keys [eid desired]}] (invoice-change-datoms db eid desired)) + plan)))) + + (count changes) ;; how many datoms we're actually changing + changes ;; inspect the full minimal tx + + ;; ------------------------------------------------------------------ + ;; STEP 4 — validate with dc/with: apply the tx to an in-memory db value + ;; and confirm every affected invoice still balances (sum of expense + ;; account amounts == :invoice/total). + ;; ------------------------------------------------------------------ + (def preview (dc/with (dc/db conn) changes)) + + (def balance-report + (let [db-after (:db-after preview)] + (mapv (fn [{:keys [eid invoice-number]}] + (let [inv (dc/pull db-after + [:invoice/total + {:invoice/expense-accounts [:invoice-expense-account/amount]}] + eid) + s (reduce + 0.0 (map :invoice-expense-account/amount + (:invoice/expense-accounts inv)))] + {:invoice-number invoice-number + :total (:invoice/total inv) + :ea-sum s + :ok? (dollars= s (:invoice/total inv))})) + plan))) + balance-report + + ;; HARD GATE — do not continue unless every invoice balances post-change. + (assert (every? :ok? balance-report) + (str "unbalanced after change: " + (filterv (complement :ok?) balance-report))) + + ;; ------------------------------------------------------------------ + ;; STEP 5 — COMMIT the recode (gated). One transaction, changes only. + ;; ------------------------------------------------------------------ + #_(audit-transact changes + {:user/name "sysco recode (missing GL mappings fix)" + :user/role "admin"}) + + ;; ------------------------------------------------------------------ + ;; STEP 6 — touch the ledger for every affected invoice (separate tx; + ;; :upsert-invoice rebuilds the journal entry from the now-committed + ;; expense-accounts). Gated. + ;; ------------------------------------------------------------------ + #_(audit-transact (mapv (fn [{:keys [eid]}] [:upsert-invoice {:db/id eid}]) plan) + {:user/name "sysco recode ledger touch" + :user/role "admin"}) + + ;; ------------------------------------------------------------------ + ;; STEP 7 — verify committed result. + ;; ------------------------------------------------------------------ + #_(let [db (dc/db conn)] + (mapv (fn [{:keys [eid invoice-number]}] + {:invoice-number invoice-number + :accounts + (->> (dc/pull db + [{:invoice/expense-accounts + [:invoice-expense-account/amount + {:invoice-expense-account/account [:account/numeric-code]}]}] + eid) + :invoice/expense-accounts + (map (juxt #(get-in % [:invoice-expense-account/account :account/numeric-code]) + :invoice-expense-account/amount)) + (sort-by first) + vec)}) + plan)))