Files
integreat/scratch-sessions/ingest_ml.clj
2023-07-13 20:50:14 -07:00

114 lines
4.6 KiB
Clojure

(ns ingest-ml
(:require [datomic.api :as dc]
[clojure.java.io :as io]
[clojure.data.csv :as csv]
#_[iol-ion.tx :refer [upsert-entity]]
[auto-ap.datomic :refer [conn]]))
(println "hi")
(defn boot []
(user/init-repl)
(user/start-db))
(defn reset-inference []
(doseq [p (->>
(dc/q '[:find ?t
:where [?t :transaction/recommended-account]]
(dc/db conn))
(map (fn [[t]]
[:upsert-entity {:db/id t
:transaction/recommended-account nil
:tranasction/vendor-confidence nil
:transaction/account-confidence nil
:transaction/recommended-vendor nil}]))
(partition-all 100))]
@(dc/transact conn p)))
(defn slurp-csv [c]
(with-open [s (io/reader c)]
(into [] (csv/read-csv s))))
(defn read-inference []
(let [numeric-code->account (into {} (dc/q '[:find ?nc ?a
:in $
:where [?a :account/numeric-code ?nc]]
(dc/db conn)))
inference-account->account (into {}
(comp
(drop 1)
(map (fn [[a n]]
[(Long/parseLong a) (numeric-code->account (Long/parseLong n))])))
(slurp-csv "/mnt/data/dev2/ml-test/input/accounts.csv"))
vendor-name->vendor (into {} (dc/q '[:find ?vn ?v
:in $
:where [?v :vendor/name ?vn]]
(dc/db conn)))
inference-vendor->vendor (into {}
(comp
(drop 1)
(map (fn [[a n]]
[(Long/parseLong a) (vendor-name->vendor (Long/parseLong n))])))
(slurp-csv "/mnt/data/dev2/ml-test/input/vendors.csv"))]
#_inference-vendor->vendor
(with-open [reader (io/reader "/mnt/data/dev2/ml-test/inference-outcome.csv")]
(->> (csv/read-csv reader)
(into []
(comp
(drop 1)
(map (fn [[_ transaction best-vendor best-account account-confidence]]
{:db/id (Long/parseLong transaction)
:transaction/recommended-account (inference-account->account best-account)
:transaction/account-confidence (Double/parseDouble account-confidence)
:transaction/recommended-vendor (inference-vendor->vendor best-vendor)}))))))))
(defn apply-inference [inference]
(doseq [p (->> inference (partition-all 100))]
@(dc/transact conn p)))
(defn check-applied-inference []
(clojure.pprint/pprint
(->>
(dc/q '[:find ?code ?bac ?do ?amount ?vc ?rvn ?ac ?ran
:in $
:where
[?t :transaction/recommended-account ?ra]
[?ra :account/name ?ran]
[?t :transaction/account-confidence ?ac]
[?t :transaction/recommended-vendor ?rv]
[?rv :vendor/name ?rvn]
[?t :transaction/description-original ?do]
[?t :transaction/client ?c]
[?c :client/code ?code]
[?t :transaction/bank-account ?ba]
[?ba :bank-account/code ?bac]
[?t :transaction/amount ?amount]
]
(dc/db conn))
(shuffle)
(take 10))))
(defn random-infer []
(let [n (rand-int 1000)
vs (into [] (map first (dc/q '[:find ?v :in $ :where [?v :vendor/name]] (dc/db conn))))
as (into [] (map first (dc/q '[:find ?v :in $ :where [?v :account/name]] (dc/db conn))))]
(->>
(dc/qseq {:query '[:find ?t
:where
[?t :transaction/approval-status :transaction-approval-status/unapproved]
(not [?t :transaction/vendor])]
:args [(dc/db conn)]})
(map first)
(take n)
(map (fn [t]
{:db/id t
:transaction/recommended-account (rand-nth as)
:transaction/account-confidence (double (/ (double (rand-int 100)) 100.0))
:transaction/recommended-vendor (rand-nth vs)})))))