From baa1c2e001424eb7e133cea8382d358ecba81488 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Sat, 1 Apr 2023 14:13:31 -0700 Subject: [PATCH] (cloud) incremental rebuild of search indexes --- src/clj/auto_ap/graphql/vendors.clj | 69 +++++++++++++++++++++++------ src/clj/auto_ap/search.clj | 38 +++++++++------- things-to-search-for.txt | 56 +++++++++++++++++++++-- 3 files changed, 130 insertions(+), 33 deletions(-) diff --git a/src/clj/auto_ap/graphql/vendors.clj b/src/clj/auto_ap/graphql/vendors.clj index 011038a9..3f39a059 100644 --- a/src/clj/auto_ap/graphql/vendors.clj +++ b/src/clj/auto_ap/graphql/vendors.clj @@ -16,10 +16,15 @@ [auto-ap.utils :refer [heartbeat]] [clojure.set :as set] [clojure.string :as str] + [manifold.executor :as ex] + [manifold.deferred :as de] [clojure.tools.logging :as log] [datomic.client.api :as dc] [yang.scheduler :as scheduler] - [mount.core :as mount])) + [mount.core :as mount] + [clj-time.core :as time] + [clj-time.coerce :as coerce] + [com.brunobonacci.mulog :as mu])) (defn can-user-edit-vendor? [vendor-id id] (if (is-admin? id) @@ -182,22 +187,60 @@ (not (is-admin? (:id context))) (assoc :hidden false)) "vendor")] {:name name - :id id}) - )) + :id id}))) + +(def single-thread (ex/fixed-thread-executor 1)) (defn rebuild-search-index [] - (search/full-index-query - (for [result (map first (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden]) - :in $ - :where [?v :vendor/search-terms ]] - (dc/db conn)))] - {:id (:db/id result) - :text (or (first (:vendor/search-terms result)) - (:vendor/name result)) - :hidden (boolean (:vendor/hidden result))}) - "vendor")) + (de/future-with + single-thread + (search/full-index-query + (for [[result] (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden]) + :in $ + :where [?v :vendor/search-terms ]] + (dc/db conn))] + {:id (:db/id result) + :text (or (first (:vendor/search-terms result)) + (:vendor/name result)) + :hidden (boolean (:vendor/hidden result))}) + "vendor"))) + +(def last-run-basis (atom nil)) + +(defn add-incremental-changes [] + (de/future-with + single-thread + (if-let [last-run-basis-value @last-run-basis] + (let [db (dc/db conn) + recent (dc/since db last-run-basis-value) + _ (mu/log ::indexing + :last-run last-run-basis-value + :starting-from (:basisT db)) + results (for [[result] (dc/qseq '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden]) + :in $ $$ + :where [$ ?v :vendor/name ] + [$$ ?v]] + db + recent)] + {:id (:db/id result) + :text (or (first (:vendor/search-terms result)) + (:vendor/name result)) + :hidden (boolean (:vendor/hidden result))})] + (when (seq results) + (mu/log ::adding-to-index + :sample (first results) + :count (count results)) + (search/full-index-query results "vendor" false)) + (reset! last-run-basis (:basisT db)) + (count results)) + (reset! last-run-basis (:basisT (dc/db conn)))))) #_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} (mount/defstate indexer :start (scheduler/every (* 5 60 1000) (heartbeat rebuild-search-index "rebuild-search-index")) :stop (scheduler/stop indexer)) + +#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]} +(mount/defstate incremental-indexer + :start (scheduler/every (* 5 1000) (heartbeat add-incremental-changes "incremental-indexing")) + :stop (scheduler/stop incremental-indexer)) diff --git a/src/clj/auto_ap/search.clj b/src/clj/auto_ap/search.clj index 07e57810..e0359891 100644 --- a/src/clj/auto_ap/search.clj +++ b/src/clj/auto_ap/search.clj @@ -9,23 +9,27 @@ (org.apache.lucene.search BooleanClause$Occur BooleanQuery$Builder IndexSearcher PhraseQuery$Builder Query TermQuery) (org.apache.lucene.store FSDirectory))) -(defn full-index-query [results index-name] - (let [directory (FSDirectory/open (Paths/get (java.net.URI. (str "file:///tmp/search/" (:dd-env env) "/" index-name)))) - analyzer (StandardAnalyzer.) - index-writer-config (IndexWriterConfig. analyzer) - index-writer (IndexWriter. directory index-writer-config)] - (.deleteAll index-writer) - (try - (doseq [{:keys [text id] :as x} results - :let [doc (doto - (Document.) - (.add (TextField. "name" text Field$Store/YES)) - (.add (StoredField. "id" (long id))))]] - (doseq [k (filter (complement #{:text :id}) (keys x))] - (.add doc (StringField. (name k) (str (get x k)) Field$Store/YES))) - (.addDocument index-writer doc)) - (finally - (.close index-writer))))) +(defn full-index-query + ([results index-name] + (full-index-query results index-name true)) + ([results index-name delete?] + (let [directory (FSDirectory/open (Paths/get (java.net.URI. (str "file:///tmp/search/" (:dd-env env) "/" index-name)))) + analyzer (StandardAnalyzer.) + index-writer-config (IndexWriterConfig. analyzer) + index-writer (IndexWriter. directory index-writer-config)] + (when delete? + (.deleteAll index-writer)) + (try + (doseq [{:keys [text id] :as x} results + :let [doc (doto + (Document.) + (.add (TextField. "name" text Field$Store/YES)) + (.add (StoredField. "id" (long id))))]] + (doseq [k (filter (complement #{:text :id}) (keys x))] + (.add doc (StringField. (name k) (str (get x k)) Field$Store/YES))) + (.addDocument index-writer doc)) + (finally + (.close index-writer)))))) (defn make-query [n] (let [ diff --git a/things-to-search-for.txt b/things-to-search-for.txt index a147cc46..6ee1fa6a 100644 --- a/things-to-search-for.txt +++ b/things-to-search-for.txt @@ -1,10 +1,8 @@ it looks like there are a bbunch of orrphaned customizations for accounts, breaking indexes upsertledger - matching transaction rule might not assign an account. Other things might not assign accounts. This is an assertion that is commented out. Determine consequence of disabling Double check each job still functions in the new system -Reconcile ledger. Does it work? What are the downsides? Can it be made faster now? Make reports just be based on running-balances -When you add a vendor, it should be searchable immediately - +Test exports Some jobs just aren't so big they need to be jobs anymore: Refreshing running balance for journal entry lines @@ -70,3 +68,55 @@ Merge branch into master Rename prod-cloud to prod everywhere Release again git push deploy master + + + +Fix ledger entries: +(clojure.data.csv/write-csv + *out* + (->> (auto-ap.datomic/pull-many (dc/as-of (dc/db conn) #inst "2023-03-31T00:00:00") '[:invoice/date {:invoice/client [:client/code] + :invoice/expense-accounts + [{:invoice-expense-account/account [:account/name]}] + :journal-entry/_original-entity [{:journal-entry/line-items [{:journal-entry-line/account [:account/name]}]}]}] (->> bad (map first))) + (map (fn [i] + [(-> i :invoice/client :client/code) + (-> i :invoice/date clj-time.coerce/to-date-time (auto-ap.time/unparse auto-ap.time/iso-date)) + (-> i :invoice/expense-accounts first :invoice-expense-account/account :account/name) + (-> i :journal-entry/_original-entity first :journal-entry/line-items + (#(filter (fn [a] (not= "Accounts Payable" (:account/name (:journal-entry-line/account a)))) %)) + first :journal-entry-line/account :account/name)])) + )) + + +BSG,2021-11-19,Advertising,Food Sales +BSG,2021-08-26,Auto and Truck Expenses,Service or Pass Through Costs +BSG,2021-12-01,Advertising,Design +WGC,2021-03-01,Equipment 3,Kitchen Equipment +WGC,2021-02-03,Equipment 3,Kitchen Equipment +WGC,2020-12-14,Marketing Consultant,Website/ Social Media +WGC,2020-12-10,Charitable Contributions,Promotional or Donation Meal Comps +WGC,2020-12-15,Note Payable 3,Notes Payable - General +WGC,2020-11-16,Note Payable 3,Notes Payable - General +WGC,2020-11-05,Unassigned Expenses,Misc Payments +WGC,2021-01-13,Marketing Consultant,Website/ Social Media +WGC,2020-10-15,Note Payable 3,Notes Payable - General +BSG,2021-07-20,Auto and Truck Expenses,Service or Pass Through Costs +WGC,2021-02-12,Marketing Consultant,Website/ Social Media +BSG,2021-12-22,Advertising,Food Sales +BSG,2021-12-31,Advertising,Food Sales +BSG,2021-06-15,Advertising,Design +BSG,2021-12-14,Advertising,Food Sales +BSG,2021-04-15,Advertising,Design +WGC,2021-06-11,Marketing Consultant,Website/ Social Media +BSG,2021-12-14,Food Cost,Meal Comps +WGC,2021-08-05,Marketing Consultant,Website/ Social Media +BSG,2021-09-30,Auto and Truck Expenses,Service or Pass Through Costs +BSG,2021-08-25,Marketing,Marketing Consultant +BSG,2021-12-03,Food Cost,Meal Comps +BSG,2021-12-09,Food Cost,Meal Comps +WGC,2021-10-08,Advertising,Website/ Social Media +BSG,2021-12-09,Advertising,Food Sales +WGC,2021-03-26,Equipment 3,Kitchen Equipment +WGC,2021-03-26,Equipment 3,Kitchen Equipment +BSG,2021-05-26,Advertising,Design +WGC,2021-09-17,Advertising,Promotional or Donation Meal Comps