diff --git a/scratch-sessions/build_ml.clj b/scratch-sessions/build_ml.clj index 6b39cec7..e74fdc1e 100644 --- a/scratch-sessions/build_ml.clj +++ b/scratch-sessions/build_ml.clj @@ -79,7 +79,7 @@ :quote? (constantly true)))) (defn write-inference [] - (with-open [f (io/writer "/mnt/data/dev2/ml-test/input/inference.csv")] + (with-open [f (io/writer "~/dev/transaction-training/input/inference.csv")] (csv/write-csv f (into [["transaction" "client" "bank" "bank_type" "description" "date" "amount"]] (->> diff --git a/scratch-sessions/ingest_ml.clj b/scratch-sessions/ingest_ml.clj index d8247bed..07001b8a 100644 --- a/scratch-sessions/ingest_ml.clj +++ b/scratch-sessions/ingest_ml.clj @@ -1,8 +1,8 @@ (ns ingest-ml - (:require [datomic.client.api :as dc] + (:require [datomic.api :as dc] [clojure.java.io :as io] [clojure.data.csv :as csv] - [iol-ion.tx :refer [upsert-entity]] + #_[iol-ion.tx :refer [upsert-entity]] [auto-ap.datomic :refer [conn]])) (println "hi") @@ -13,37 +13,60 @@ (defn reset-inference [] - (doseq [p (->> - (dc/q '[:find ?t - :where [?t :transaction/recommended-account]] - (dc/db conn)) - (map (fn [[t]] - `(upsert-entity - ~{:db/id t - :transaction/recommended-account nil - :tranasction/vendor-confidence nil - :transaction/account-confidence nil - :transaction/recommended-vendor nil}))) - - (partition-all 100))] - (dc/transact conn {:tx-data p}))) + (doseq [p (->> + (dc/q '[:find ?t + :where [?t :transaction/recommended-account]] + (dc/db conn)) + (map (fn [[t]] + [:upsert-entity {:db/id t + :transaction/recommended-account nil + :tranasction/vendor-confidence nil + :transaction/account-confidence nil + :transaction/recommended-vendor nil}])) + + (partition-all 100))] + @(dc/transact conn p))) + +(defn slurp-csv [c] + (with-open [s (io/reader c)] + (into [] (csv/read-csv s)))) (defn read-inference [] - (with-open [reader (io/reader "/mnt/data/dev2/ml-test/inference-outcome.csv")] - (->> (csv/read-csv reader) - (into [] - (comp - (drop 1) - (map (fn [[_ transaction best-vendor best-account account-confidence]] - {:db/id (Long/parseLong transaction) - :transaction/recommended-account (Long/parseLong best-account) - :transaction/account-confidence (Double/parseDouble account-confidence) - :transaction/recommended-vendor (Long/parseLong best-vendor)} - ))))))) + (let [numeric-code->account (into {} (dc/q '[:find ?nc ?a + :in $ + :where [?a :account/numeric-code ?nc]] + (dc/db conn))) + inference-account->account (into {} + (comp + (drop 1) + (map (fn [[a n]] + [(Long/parseLong a) (numeric-code->account (Long/parseLong n))]))) + (slurp-csv "/home/notid/dev/transaction-training/input/accounts.csv")) + vendor-name->vendor (into {} (dc/q '[:find ?vn ?v + :in $ + :where [?v :vendor/name ?vn]] + (dc/db conn))) + inference-vendor->vendor (into {} + (comp + (drop 1) + (map (fn [[a n]] + [(Long/parseLong a) (vendor-name->vendor (Long/parseLong n))]))) + (slurp-csv "/home/notid/dev/transaction-training/input/vendors.csv"))] + inference-vendor->vendor + #_(with-open [reader (io/reader "/home/notid/dev/transaction-training/inference-outcome.csv")] + (->> (csv/read-csv reader) + (into [] + (comp + (drop 1) + (map (fn [[_ transaction best-vendor best-account account-confidence]] + {:db/id (Long/parseLong transaction) + :transaction/recommended-account (Long/parseLong best-account) + :transaction/account-confidence (Double/parseDouble account-confidence) + :transaction/recommended-vendor (Long/parseLong best-vendor)})))))))) (defn apply-inference [inference] (doseq [p (->> inference (partition-all 100))] - (dc/transact conn {:tx-data p}))) + @(dc/transact conn p))) (defn check-applied-inference [] @@ -69,3 +92,22 @@ (dc/db conn)) (shuffle) (take 10)))) + + +(defn random-infer [] + (let [n (rand-int 1000) + vs (into [] (map first (dc/q '[:find ?v :in $ :where [?v :vendor/name]] (dc/db conn)))) + as (into [] (map first (dc/q '[:find ?v :in $ :where [?v :account/name]] (dc/db conn))))] + (->> + (dc/qseq {:query '[:find ?t + :where + [?t :transaction/approval-status :transaction-approval-status/unapproved] + (not [?t :transaction/vendor])] + :args [(dc/db conn)]}) + (map first) + (take n) + (map (fn [t] + {:db/id t + :transaction/recommended-account (rand-nth as) + :transaction/account-confidence (double (/ (double (rand-int 100)) 100.0)) + :transaction/recommended-vendor (rand-nth vs)})))))