(cloud) incremental rebuild of search indexes

This commit is contained in:
2023-04-01 14:13:31 -07:00
parent 55f941e07b
commit baa1c2e001
3 changed files with 130 additions and 33 deletions

View File

@@ -16,10 +16,15 @@
[auto-ap.utils :refer [heartbeat]]
[clojure.set :as set]
[clojure.string :as str]
[manifold.executor :as ex]
[manifold.deferred :as de]
[clojure.tools.logging :as log]
[datomic.client.api :as dc]
[yang.scheduler :as scheduler]
[mount.core :as mount]))
[mount.core :as mount]
[clj-time.core :as time]
[clj-time.coerce :as coerce]
[com.brunobonacci.mulog :as mu]))
(defn can-user-edit-vendor? [vendor-id id]
(if (is-admin? id)
@@ -182,22 +187,60 @@
(not (is-admin? (:id context))) (assoc :hidden false))
"vendor")]
{:name name
:id id})
))
:id id})))
(def single-thread (ex/fixed-thread-executor 1))
(defn rebuild-search-index []
(search/full-index-query
(for [result (map first (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden])
:in $
:where [?v :vendor/search-terms ]]
(dc/db conn)))]
{:id (:db/id result)
:text (or (first (:vendor/search-terms result))
(:vendor/name result))
:hidden (boolean (:vendor/hidden result))})
"vendor"))
(de/future-with
single-thread
(search/full-index-query
(for [[result] (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden])
:in $
:where [?v :vendor/search-terms ]]
(dc/db conn))]
{:id (:db/id result)
:text (or (first (:vendor/search-terms result))
(:vendor/name result))
:hidden (boolean (:vendor/hidden result))})
"vendor")))
(def last-run-basis (atom nil))
(defn add-incremental-changes []
(de/future-with
single-thread
(if-let [last-run-basis-value @last-run-basis]
(let [db (dc/db conn)
recent (dc/since db last-run-basis-value)
_ (mu/log ::indexing
:last-run last-run-basis-value
:starting-from (:basisT db))
results (for [[result] (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden])
:in $ $$
:where [$ ?v :vendor/name ]
[$$ ?v]]
db
recent)]
{:id (:db/id result)
:text (or (first (:vendor/search-terms result))
(:vendor/name result))
:hidden (boolean (:vendor/hidden result))})]
(when (seq results)
(mu/log ::adding-to-index
:sample (first results)
:count (count results))
(search/full-index-query results "vendor" false))
(reset! last-run-basis (:basisT db))
(count results))
(reset! last-run-basis (:basisT (dc/db conn))))))
#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
(mount/defstate indexer
:start (scheduler/every (* 5 60 1000) (heartbeat rebuild-search-index "rebuild-search-index"))
:stop (scheduler/stop indexer))
#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
(mount/defstate incremental-indexer
:start (scheduler/every (* 5 1000) (heartbeat add-incremental-changes "incremental-indexing"))
:stop (scheduler/stop incremental-indexer))

View File

@@ -9,23 +9,27 @@
(org.apache.lucene.search BooleanClause$Occur BooleanQuery$Builder IndexSearcher PhraseQuery$Builder Query TermQuery)
(org.apache.lucene.store FSDirectory)))
(defn full-index-query [results index-name]
(let [directory (FSDirectory/open (Paths/get (java.net.URI. (str "file:///tmp/search/" (:dd-env env) "/" index-name))))
analyzer (StandardAnalyzer.)
index-writer-config (IndexWriterConfig. analyzer)
index-writer (IndexWriter. directory index-writer-config)]
(.deleteAll index-writer)
(try
(doseq [{:keys [text id] :as x} results
:let [doc (doto
(Document.)
(.add (TextField. "name" text Field$Store/YES))
(.add (StoredField. "id" (long id))))]]
(doseq [k (filter (complement #{:text :id}) (keys x))]
(.add doc (StringField. (name k) (str (get x k)) Field$Store/YES)))
(.addDocument index-writer doc))
(finally
(.close index-writer)))))
(defn full-index-query
([results index-name]
(full-index-query results index-name true))
([results index-name delete?]
(let [directory (FSDirectory/open (Paths/get (java.net.URI. (str "file:///tmp/search/" (:dd-env env) "/" index-name))))
analyzer (StandardAnalyzer.)
index-writer-config (IndexWriterConfig. analyzer)
index-writer (IndexWriter. directory index-writer-config)]
(when delete?
(.deleteAll index-writer))
(try
(doseq [{:keys [text id] :as x} results
:let [doc (doto
(Document.)
(.add (TextField. "name" text Field$Store/YES))
(.add (StoredField. "id" (long id))))]]
(doseq [k (filter (complement #{:text :id}) (keys x))]
(.add doc (StringField. (name k) (str (get x k)) Field$Store/YES)))
(.addDocument index-writer doc))
(finally
(.close index-writer))))))
(defn make-query [n]
(let [

View File

@@ -1,10 +1,8 @@
it looks like there are a bbunch of orrphaned customizations for accounts, breaking indexes
upsertledger - matching transaction rule might not assign an account. Other things might not assign accounts. This is an assertion that is commented out. Determine consequence of disabling
Double check each job still functions in the new system
Reconcile ledger. Does it work? What are the downsides? Can it be made faster now?
Make reports just be based on running-balances
When you add a vendor, it should be searchable immediately
Test exports
Some jobs just aren't so big they need to be jobs anymore:
Refreshing running balance for journal entry lines
@@ -70,3 +68,55 @@ Merge branch into master
Rename prod-cloud to prod everywhere
Release again
git push deploy master
Fix ledger entries:
(clojure.data.csv/write-csv
*out*
(->> (auto-ap.datomic/pull-many (dc/as-of (dc/db conn) #inst "2023-03-31T00:00:00") '[:invoice/date {:invoice/client [:client/code]
:invoice/expense-accounts
[{:invoice-expense-account/account [:account/name]}]
:journal-entry/_original-entity [{:journal-entry/line-items [{:journal-entry-line/account [:account/name]}]}]}] (->> bad (map first)))
(map (fn [i]
[(-> i :invoice/client :client/code)
(-> i :invoice/date clj-time.coerce/to-date-time (auto-ap.time/unparse auto-ap.time/iso-date))
(-> i :invoice/expense-accounts first :invoice-expense-account/account :account/name)
(-> i :journal-entry/_original-entity first :journal-entry/line-items
(#(filter (fn [a] (not= "Accounts Payable" (:account/name (:journal-entry-line/account a)))) %))
first :journal-entry-line/account :account/name)]))
))
BSG,2021-11-19,Advertising,Food Sales
BSG,2021-08-26,Auto and Truck Expenses,Service or Pass Through Costs
BSG,2021-12-01,Advertising,Design
WGC,2021-03-01,Equipment 3,Kitchen Equipment
WGC,2021-02-03,Equipment 3,Kitchen Equipment
WGC,2020-12-14,Marketing Consultant,Website/ Social Media
WGC,2020-12-10,Charitable Contributions,Promotional or Donation Meal Comps
WGC,2020-12-15,Note Payable 3,Notes Payable - General
WGC,2020-11-16,Note Payable 3,Notes Payable - General
WGC,2020-11-05,Unassigned Expenses,Misc Payments
WGC,2021-01-13,Marketing Consultant,Website/ Social Media
WGC,2020-10-15,Note Payable 3,Notes Payable - General
BSG,2021-07-20,Auto and Truck Expenses,Service or Pass Through Costs
WGC,2021-02-12,Marketing Consultant,Website/ Social Media
BSG,2021-12-22,Advertising,Food Sales
BSG,2021-12-31,Advertising,Food Sales
BSG,2021-06-15,Advertising,Design
BSG,2021-12-14,Advertising,Food Sales
BSG,2021-04-15,Advertising,Design
WGC,2021-06-11,Marketing Consultant,Website/ Social Media
BSG,2021-12-14,Food Cost,Meal Comps
WGC,2021-08-05,Marketing Consultant,Website/ Social Media
BSG,2021-09-30,Auto and Truck Expenses,Service or Pass Through Costs
BSG,2021-08-25,Marketing,Marketing Consultant
BSG,2021-12-03,Food Cost,Meal Comps
BSG,2021-12-09,Food Cost,Meal Comps
WGC,2021-10-08,Advertising,Website/ Social Media
BSG,2021-12-09,Advertising,Food Sales
WGC,2021-03-26,Equipment 3,Kitchen Equipment
WGC,2021-03-26,Equipment 3,Kitchen Equipment
BSG,2021-05-26,Advertising,Design
WGC,2021-09-17,Advertising,Promotional or Donation Meal Comps