(cloud) incremental rebuild of search indexes

This commit is contained in:
2023-04-01 14:13:31 -07:00
parent 55f941e07b
commit baa1c2e001
3 changed files with 130 additions and 33 deletions

View File

@@ -16,10 +16,15 @@
[auto-ap.utils :refer [heartbeat]] [auto-ap.utils :refer [heartbeat]]
[clojure.set :as set] [clojure.set :as set]
[clojure.string :as str] [clojure.string :as str]
[manifold.executor :as ex]
[manifold.deferred :as de]
[clojure.tools.logging :as log] [clojure.tools.logging :as log]
[datomic.client.api :as dc] [datomic.client.api :as dc]
[yang.scheduler :as scheduler] [yang.scheduler :as scheduler]
[mount.core :as mount])) [mount.core :as mount]
[clj-time.core :as time]
[clj-time.coerce :as coerce]
[com.brunobonacci.mulog :as mu]))
(defn can-user-edit-vendor? [vendor-id id] (defn can-user-edit-vendor? [vendor-id id]
(if (is-admin? id) (if (is-admin? id)
@@ -182,22 +187,60 @@
(not (is-admin? (:id context))) (assoc :hidden false)) (not (is-admin? (:id context))) (assoc :hidden false))
"vendor")] "vendor")]
{:name name {:name name
:id id}) :id id})))
))
(def single-thread (ex/fixed-thread-executor 1))
(defn rebuild-search-index [] (defn rebuild-search-index []
(search/full-index-query (de/future-with
(for [result (map first (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden]) single-thread
:in $ (search/full-index-query
:where [?v :vendor/search-terms ]] (for [[result] (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden])
(dc/db conn)))] :in $
{:id (:db/id result) :where [?v :vendor/search-terms ]]
:text (or (first (:vendor/search-terms result)) (dc/db conn))]
(:vendor/name result)) {:id (:db/id result)
:hidden (boolean (:vendor/hidden result))}) :text (or (first (:vendor/search-terms result))
"vendor")) (:vendor/name result))
:hidden (boolean (:vendor/hidden result))})
"vendor")))
(def last-run-basis (atom nil))
(defn add-incremental-changes []
(de/future-with
single-thread
(if-let [last-run-basis-value @last-run-basis]
(let [db (dc/db conn)
recent (dc/since db last-run-basis-value)
_ (mu/log ::indexing
:last-run last-run-basis-value
:starting-from (:basisT db))
results (for [[result] (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden])
:in $ $$
:where [$ ?v :vendor/name ]
[$$ ?v]]
db
recent)]
{:id (:db/id result)
:text (or (first (:vendor/search-terms result))
(:vendor/name result))
:hidden (boolean (:vendor/hidden result))})]
(when (seq results)
(mu/log ::adding-to-index
:sample (first results)
:count (count results))
(search/full-index-query results "vendor" false))
(reset! last-run-basis (:basisT db))
(count results))
(reset! last-run-basis (:basisT (dc/db conn))))))
#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} #_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
(mount/defstate indexer (mount/defstate indexer
:start (scheduler/every (* 5 60 1000) (heartbeat rebuild-search-index "rebuild-search-index")) :start (scheduler/every (* 5 60 1000) (heartbeat rebuild-search-index "rebuild-search-index"))
:stop (scheduler/stop indexer)) :stop (scheduler/stop indexer))
#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
(mount/defstate incremental-indexer
:start (scheduler/every (* 5 1000) (heartbeat add-incremental-changes "incremental-indexing"))
:stop (scheduler/stop incremental-indexer))

View File

@@ -9,23 +9,27 @@
(org.apache.lucene.search BooleanClause$Occur BooleanQuery$Builder IndexSearcher PhraseQuery$Builder Query TermQuery) (org.apache.lucene.search BooleanClause$Occur BooleanQuery$Builder IndexSearcher PhraseQuery$Builder Query TermQuery)
(org.apache.lucene.store FSDirectory))) (org.apache.lucene.store FSDirectory)))
(defn full-index-query [results index-name] (defn full-index-query
(let [directory (FSDirectory/open (Paths/get (java.net.URI. (str "file:///tmp/search/" (:dd-env env) "/" index-name)))) ([results index-name]
analyzer (StandardAnalyzer.) (full-index-query results index-name true))
index-writer-config (IndexWriterConfig. analyzer) ([results index-name delete?]
index-writer (IndexWriter. directory index-writer-config)] (let [directory (FSDirectory/open (Paths/get (java.net.URI. (str "file:///tmp/search/" (:dd-env env) "/" index-name))))
(.deleteAll index-writer) analyzer (StandardAnalyzer.)
(try index-writer-config (IndexWriterConfig. analyzer)
(doseq [{:keys [text id] :as x} results index-writer (IndexWriter. directory index-writer-config)]
:let [doc (doto (when delete?
(Document.) (.deleteAll index-writer))
(.add (TextField. "name" text Field$Store/YES)) (try
(.add (StoredField. "id" (long id))))]] (doseq [{:keys [text id] :as x} results
(doseq [k (filter (complement #{:text :id}) (keys x))] :let [doc (doto
(.add doc (StringField. (name k) (str (get x k)) Field$Store/YES))) (Document.)
(.addDocument index-writer doc)) (.add (TextField. "name" text Field$Store/YES))
(finally (.add (StoredField. "id" (long id))))]]
(.close index-writer))))) (doseq [k (filter (complement #{:text :id}) (keys x))]
(.add doc (StringField. (name k) (str (get x k)) Field$Store/YES)))
(.addDocument index-writer doc))
(finally
(.close index-writer))))))
(defn make-query [n] (defn make-query [n]
(let [ (let [

View File

@@ -1,10 +1,8 @@
it looks like there are a bbunch of orrphaned customizations for accounts, breaking indexes it looks like there are a bbunch of orrphaned customizations for accounts, breaking indexes
upsertledger - matching transaction rule might not assign an account. Other things might not assign accounts. This is an assertion that is commented out. Determine consequence of disabling upsertledger - matching transaction rule might not assign an account. Other things might not assign accounts. This is an assertion that is commented out. Determine consequence of disabling
Double check each job still functions in the new system Double check each job still functions in the new system
Reconcile ledger. Does it work? What are the downsides? Can it be made faster now?
Make reports just be based on running-balances Make reports just be based on running-balances
When you add a vendor, it should be searchable immediately Test exports
Some jobs just aren't so big they need to be jobs anymore: Some jobs just aren't so big they need to be jobs anymore:
Refreshing running balance for journal entry lines Refreshing running balance for journal entry lines
@@ -70,3 +68,55 @@ Merge branch into master
Rename prod-cloud to prod everywhere Rename prod-cloud to prod everywhere
Release again Release again
git push deploy master git push deploy master
Fix ledger entries:
(clojure.data.csv/write-csv
*out*
(->> (auto-ap.datomic/pull-many (dc/as-of (dc/db conn) #inst "2023-03-31T00:00:00") '[:invoice/date {:invoice/client [:client/code]
:invoice/expense-accounts
[{:invoice-expense-account/account [:account/name]}]
:journal-entry/_original-entity [{:journal-entry/line-items [{:journal-entry-line/account [:account/name]}]}]}] (->> bad (map first)))
(map (fn [i]
[(-> i :invoice/client :client/code)
(-> i :invoice/date clj-time.coerce/to-date-time (auto-ap.time/unparse auto-ap.time/iso-date))
(-> i :invoice/expense-accounts first :invoice-expense-account/account :account/name)
(-> i :journal-entry/_original-entity first :journal-entry/line-items
(#(filter (fn [a] (not= "Accounts Payable" (:account/name (:journal-entry-line/account a)))) %))
first :journal-entry-line/account :account/name)]))
))
BSG,2021-11-19,Advertising,Food Sales
BSG,2021-08-26,Auto and Truck Expenses,Service or Pass Through Costs
BSG,2021-12-01,Advertising,Design
WGC,2021-03-01,Equipment 3,Kitchen Equipment
WGC,2021-02-03,Equipment 3,Kitchen Equipment
WGC,2020-12-14,Marketing Consultant,Website/ Social Media
WGC,2020-12-10,Charitable Contributions,Promotional or Donation Meal Comps
WGC,2020-12-15,Note Payable 3,Notes Payable - General
WGC,2020-11-16,Note Payable 3,Notes Payable - General
WGC,2020-11-05,Unassigned Expenses,Misc Payments
WGC,2021-01-13,Marketing Consultant,Website/ Social Media
WGC,2020-10-15,Note Payable 3,Notes Payable - General
BSG,2021-07-20,Auto and Truck Expenses,Service or Pass Through Costs
WGC,2021-02-12,Marketing Consultant,Website/ Social Media
BSG,2021-12-22,Advertising,Food Sales
BSG,2021-12-31,Advertising,Food Sales
BSG,2021-06-15,Advertising,Design
BSG,2021-12-14,Advertising,Food Sales
BSG,2021-04-15,Advertising,Design
WGC,2021-06-11,Marketing Consultant,Website/ Social Media
BSG,2021-12-14,Food Cost,Meal Comps
WGC,2021-08-05,Marketing Consultant,Website/ Social Media
BSG,2021-09-30,Auto and Truck Expenses,Service or Pass Through Costs
BSG,2021-08-25,Marketing,Marketing Consultant
BSG,2021-12-03,Food Cost,Meal Comps
BSG,2021-12-09,Food Cost,Meal Comps
WGC,2021-10-08,Advertising,Website/ Social Media
BSG,2021-12-09,Advertising,Food Sales
WGC,2021-03-26,Equipment 3,Kitchen Equipment
WGC,2021-03-26,Equipment 3,Kitchen Equipment
BSG,2021-05-26,Advertising,Design
WGC,2021-09-17,Advertising,Promotional or Donation Meal Comps