Files
integreat/scratch-sessions/build_ml.clj
2023-08-13 15:57:15 -07:00

213 lines
9.6 KiB
Clojure

;; This buffer is for Clojure experiments and evaluation.
;; Press C-j to evaluate the last expression.
;; You can also press C-u C-j to evaluate the expression and pretty-print its result.
(ns build-ml
(:require [datomic.api :as dc]
[clojure.java.io :as io]
[clojure.data.csv :as csv]
[auto-ap.datomic :refer [conn]]
[auto-ap.datomic :refer [visible-clients]]))
(println "hi")
(defn boot []
(user/init-repl)
(user/start-db))
(defn write-recent-transactions-to-embed []
(let [visible-clients (visible-clients identity)]
(->>
(dc/qseq {:query '[:find ?t ?c ?bn ?bt2 ?do ?d ?amt ?v
:in $ ?starting [?c ...] pull-expr
:where
[?t :transaction/client ?c]
;; [?t :transaction/approval-status :transaction-approval-status/unapproved]
;; [?t :transaction/vendor] ;; should be not
[(>= ?d ?starting)]
[?t :transaction/description-original ?do]
[?t :transaction/amount ?amt]
[?t :transaction/date ?d]
[?t :transaction/bank-account ?b]
[?b :bank-account/name ?bn]
[?b :bank-account/name ?bn]
[?b :bank-account/type ?bat]
[?bat :db/ident ?bt]
[(name ?bt) ?bt2]
]
:args [(dc/db conn)
#inst "2021-01-01"
visible-clients
pull-expr]})
(map first)
(sort-by :transaction/date)
(reverse)
(drop-while (fn [x]
(if after
(not= (Long/parseLong after) (:db/id x))
false)))
(#(if after
(drop 1 %)
%))
(take 200)
(into [])))
)
(defn write-vendor-training-data []
(with-open [f (io/writer "/mnt/data/dev2/ml-test/input/vendor-training.csv")]
(csv/write-csv f
(into [["transaction" "client" "bank" "bank_type" "description" "date" "amount" "vendor"]]
(->>
(dc/q '[:find ?t ?c ?bn ?bt2 ?do ?d ?amt ?v
:in $ ?start
:where
[?t :transaction/date ?d]
[(>= ?d ?start)]
[?t :transaction/approval-status :transaction-approval-status/approved]
(not [?t :transaction/payment])
[?t :transaction/client ?c]
[?t :transaction/vendor ?v]
[?v :vendor/name ?v-name]
[?t :transaction/description-original ?do]
[?t :transaction/amount ?amt]
[?t :transaction/bank-account ?b]
[?b :bank-account/name ?bn]
[?b :bank-account/type ?bat]
[?bat :db/ident ?bt]
[(name ?bt) ?bt2]
]
(dc/db auto-ap.datomic/conn)
#inst "2021-01-01")
(map (fn [[t code bn bat d date amt v]]
[t code bn bat d (auto-ap.time/unparse-local (clj-time.coerce/to-date-time date) auto-ap.time/iso-date) amt v]))))
:quote? (constantly true))))
(defn write-account-training-data []
(with-open [f (io/writer "/mnt/data/dev2/ml-test/input/account-training.csv")]
(csv/write-csv f
(into [["transaction" "client" "bank" "bank_type" "description" "date" "amount" "vendor" "account"]]
(->>
(dc/q '[:find ?t ?c ?bn ?bt2 ?do ?d ?amt ?v ?a
:in $ ?start
:where
[?t :transaction/date ?d]
[(>= ?d ?start)]
[?t :transaction/approval-status :transaction-approval-status/approved]
[?t :transaction/accounts ?ta]
[?ta :transaction-account/account ?a]
[?t :transaction/vendor ?v]
[?v :vendor/name ?v-name]
[?t :transaction/client ?c]
[?t :transaction/description-original ?do]
[?t :transaction/amount ?amt]
[?t :transaction/bank-account ?b]
[?b :bank-account/name ?bn]
[?b :bank-account/type ?bat]
[?bat :db/ident ?bt]
[(name ?bt) ?bt2]]
(dc/db auto-ap.datomic/conn)
#inst "2020-01-01")
(map (fn [[t code bn bat d date amt v a]]
[t code bn bat d (auto-ap.time/unparse-local (clj-time.coerce/to-date-time date) auto-ap.time/iso-date) amt v a]))))
:quote? (constantly true))))
(defn write-inference []
(csv/write-csv *out*
(into [["transaction" "client" "bank" "bank_type" "description" "date" "amount"]]
(->>
(dc/q '[:find ?t ?c ?bn ?bt2 ?do ?d ?amt
:in $ ?start
:where
[?t :transaction/date ?d]
[(>= ?d ?start)]
[?t :transaction/approval-status :transaction-approval-status/unapproved]
(not [?t :transaction/matched-rule])
(not [?t :transaction/payment])
[?t :transaction/client ?c]
[?c :client/code ?code]
(not [?t :transaction/vendor])
(not [?t :transaction/accounts])
[?t :transaction/description-original ?do]
[?t :transaction/amount ?amt]
[?t :transaction/bank-account ?b]
[?b :bank-account/name ?bn]
[?b :bank-account/type ?bat]
[?bat :db/ident ?bt]
[(name ?bt) ?bt2]
]
(dc/db auto-ap.datomic/conn)
#inst "2023-01-01")
(map (fn [[t code bn bat d date amt]]
[t code bn bat d (auto-ap.time/unparse-local (clj-time.coerce/to-date-time date) auto-ap.time/iso-date) amt]))
))
:quote? (constantly true)))
(defn write-vendor-lookup []
(with-open [f (io/writer "/mnt/data/dev2/ml-test/input/vendors.csv")]
(csv/write-csv f
(into [["vendor" "vendor_name"]]
(->>
(dc/q '[:find ?v ?v-name
:where [?v :vendor/name ?v-name]
[_ :transaction/vendor ?v]
]
(dc/db conn))))
:separator \,)))
(defn write-vendor-client-lookup []
(with-open [f (io/writer "/mnt/data/dev2/ml-test/input/vendor-client-usage.csv")]
(csv/write-csv f
(into [["vendor" "client" "vendor_client_count"]]
(->>
(dc/q '[:find ?v ?c (count ?t)
:where
[?t :transaction/client ?c]
[?t :transaction/vendor ?v]
]
(dc/db conn))))
:separator \,)))
(defn write-account-lookup []
(with-open [f (io/writer "/mnt/data/dev2/ml-test/input/accounts.csv")]
(csv/write-csv f
(into [["account" "numeric_code"]]
(->>
(dc/q '[:find ?a ?nm
:where [?a :account/numeric-code ?nm]]
(dc/db conn))))
:separator \,)))
(defn write-account-client-lookup []
(with-open [f (io/writer "/mnt/data/dev2/ml-test/input/account-client-usage.csv")]
(csv/write-csv f
(into [["client" "account" "client_account_count"]]
(->>
(dc/q '[:find ?c ?a (count ?t)
:where
[?t :transaction/client ?c]
[?t :transaction/accounts ?ta]
[?ta :transaction-account/account ?a]]
(dc/db conn))))
:separator \,)))
(defn write-all-training []
(write-account-lookup)
(write-vendor-lookup)
(write-account-client-lookup)
(write-vendor-client-lookup)
(write-account-training-data)
(write-vendor-training-data))