diff --git a/scratch-sessions/build_ml.clj b/scratch-sessions/build_ml.clj index 6b39cec7..e74fdc1e 100644 --- a/scratch-sessions/build_ml.clj +++ b/scratch-sessions/build_ml.clj @@ -79,7 +79,7 @@ :quote? (constantly true)))) (defn write-inference [] - (with-open [f (io/writer "/mnt/data/dev2/ml-test/input/inference.csv")] + (with-open [f (io/writer "~/dev/transaction-training/input/inference.csv")] (csv/write-csv f (into [["transaction" "client" "bank" "bank_type" "description" "date" "amount"]] (->> diff --git a/scratch-sessions/ingest_ml.clj b/scratch-sessions/ingest_ml.clj index 75ac1740..07001b8a 100644 --- a/scratch-sessions/ingest_ml.clj +++ b/scratch-sessions/ingest_ml.clj @@ -27,18 +27,42 @@ (partition-all 100))] @(dc/transact conn p))) +(defn slurp-csv [c] + (with-open [s (io/reader c)] + (into [] (csv/read-csv s)))) + (defn read-inference [] - (with-open [reader (io/reader "data/inference-outcome.csv")] - (->> (csv/read-csv reader) - (into [] - (comp - (drop 1) - (map (fn [[_ transaction best-vendor best-account account-confidence]] - {:db/id (Long/parseLong transaction) - :transaction/recommended-account (Long/parseLong best-account) - :transaction/account-confidence (Double/parseDouble account-confidence) - :transaction/recommended-vendor (Long/parseLong best-vendor)} - ))))))) + (let [numeric-code->account (into {} (dc/q '[:find ?nc ?a + :in $ + :where [?a :account/numeric-code ?nc]] + (dc/db conn))) + inference-account->account (into {} + (comp + (drop 1) + (map (fn [[a n]] + [(Long/parseLong a) (numeric-code->account (Long/parseLong n))]))) + (slurp-csv "/home/notid/dev/transaction-training/input/accounts.csv")) + vendor-name->vendor (into {} (dc/q '[:find ?vn ?v + :in $ + :where [?v :vendor/name ?vn]] + (dc/db conn))) + inference-vendor->vendor (into {} + (comp + (drop 1) + (map (fn [[a n]] + [(Long/parseLong a) (vendor-name->vendor (Long/parseLong n))]))) + (slurp-csv "/home/notid/dev/transaction-training/input/vendors.csv"))] + inference-vendor->vendor + #_(with-open [reader (io/reader "/home/notid/dev/transaction-training/inference-outcome.csv")] + (->> (csv/read-csv reader) + (into [] + (comp + (drop 1) + (map (fn [[_ transaction best-vendor best-account account-confidence]] + {:db/id (Long/parseLong transaction) + :transaction/recommended-account (Long/parseLong best-account) + :transaction/account-confidence (Double/parseDouble account-confidence) + :transaction/recommended-vendor (Long/parseLong best-vendor)})))))))) (defn apply-inference [inference] (doseq [p (->> inference (partition-all 100))] @@ -76,7 +100,9 @@ as (into [] (map first (dc/q '[:find ?v :in $ :where [?v :account/name]] (dc/db conn))))] (->> (dc/qseq {:query '[:find ?t - :where [?t :transaction/client]] + :where + [?t :transaction/approval-status :transaction-approval-status/unapproved] + (not [?t :transaction/vendor])] :args [(dc/db conn)]}) (map first) (take n) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index 7e5c93e1..18693745 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -605,7 +605,7 @@ :keywords [#"reelproduce.com"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To(?:.*?)\n\n\s+(.*?)\s{2,}" - :invoice-number #"Invoice #\n.*?([\d\-]+)\n" + :invoice-number #"Invoice #\n.*?\n.*?([\d\-]+)\n" :total #"Total\s*\n\s+\$([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}}])