fixes commit problems, bad vendors

This commit is contained in:
Bryce
2023-06-08 22:18:46 -07:00
parent 49a5af9409
commit 6a43ac257b
3 changed files with 45 additions and 45 deletions

View File

@@ -199,15 +199,14 @@
(defn rebuild-search-index [] (defn rebuild-search-index []
(de/future-with (de/future-with
single-thread single-thread
(auto-ap.solr/index-documents-raw (auto-ap.solr/index-documents-raw
auto-ap.solr/impl auto-ap.solr/impl
"vendors" "vendors"
(for [[result] (dc/qseq {:query '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden]) (for [[result] (dc/qseq {:query '[:find (pull ?v [:vendor/search-terms :db/id :vendor/name :vendor/hidden])
:in $ :in $
:where [?v :vendor/search-terms ]] :where [?v :vendor/name]]
:args [(dc/db conn)]})] :args [(dc/db conn)]})]
{"id" (:db/id result) {"id" (:db/id result)
"name" (or (first (:vendor/search-terms result)) "name" (:vendor/name result)
(:vendor/name result)) "hidden" (boolean (:vendor/hidden result))}))))
"hidden" (boolean (:vendor/hidden result))}))))

View File

@@ -4,8 +4,7 @@
(def pdf-templates (def pdf-templates
[ [;; CHEF's WAREHOUSE
;; CHEF's WAREHOUSE
{:vendor "CHFW" {:vendor "CHFW"
:keywords [#"CHEF'S WAREHOUSE"] :keywords [#"CHEF'S WAREHOUSE"]
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)" :extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
@@ -75,9 +74,7 @@
:parser {:date [:clj-time "MM/dd/yy"] :parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}} :total [:trim-commas-and-negate nil]}}
;; SOUTHBAY FRESH
;; SOUTHBAY FRESH
{:vendor "Southbay Fresh Produce" {:vendor "Southbay Fresh Produce"
:keywords [#"SOUTH BAY FRESH PRODUCE"] :keywords [#"SOUTH BAY FRESH PRODUCE"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
@@ -173,10 +170,9 @@
:parser {:date [:clj-time "MM/dd/yy"] :parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]} :total [:trim-commas nil]}
:multi (. java.util.regex.Pattern (compile (-> \formfeed str) java.util.regex.Pattern/CASE_INSENSITIVE)) :multi (. java.util.regex.Pattern (compile (-> \formfeed str) java.util.regex.Pattern/CASE_INSENSITIVE))
:multi-match? #"(Total\s+[0-9\.]+|Total Order)" :multi-match? #"(Total\s+[0-9\.]+|Total Order)"}
}
;; AUTO-CHLOR ;; AUTO-CHLOR
{:vendor "Auto-Chlor" {:vendor "Auto-Chlor"
:keywords [#"AUTO-CHLOR"] :keywords [#"AUTO-CHLOR"]
:extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)"
@@ -208,8 +204,7 @@
:multi #"\n" :multi #"\n"
:multi-match? #"^\s+.*?\d{6,}.*?\$"} :multi-match? #"^\s+.*?\d{6,}.*?\$"}
;; C & L
;; C & L
{:vendor "C&L Produce" {:vendor "C&L Produce"
:keywords [#"440 Franklin Street"] :keywords [#"440 Franklin Street"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
@@ -239,8 +234,7 @@
:parser {:date [:clj-time "dd-MMM-yy"] :parser {:date [:clj-time "dd-MMM-yy"]
:total [:trim-commas-and-negate nil]}} :total [:trim-commas-and-negate nil]}}
;;; credits don't have the same format
;;; credits don't have the same format
{:vendor "General Produce Company" {:vendor "General Produce Company"
:keywords [#"1330 NORTH B"] :keywords [#"1330 NORTH B"]
:extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
@@ -362,9 +356,7 @@
:parser {:date [:clj-time "MM/dd/yyyy"] :parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}} :total [:trim-commas nil]}}
;; PACIFIC SEAFOOD
;; PACIFIC SEAFOOD
{:vendor "Pacific Seafood" {:vendor "Pacific Seafood"
:keywords [#"(pacseafood|PACIFIC FRESH)"] :keywords [#"(pacseafood|PACIFIC FRESH)"]
:extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)"
@@ -421,13 +413,12 @@
{:vendor "Le Boulanger" {:vendor "Le Boulanger"
:keywords [#"Le Boulanger"] :keywords [#"Le Boulanger"]
:extract {:date #"Invoice Date: ([^\n]+)\n" :extract {:date #"Invoice Date: ([^\n]+)\n"
:customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})" :customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"Invoice No: ([^\n]+)\n" :invoice-number #"Invoice No: ([^\n]+)\n"
:total #" Total:\s+([\d\.]+)"} :total #" Total:\s+([\d\.]+)"}
:parser {:date [:clj-time "MMM dd, yyyy"]}} :parser {:date [:clj-time "MMM dd, yyyy"]}}
;; A&B
;; A&B
{:vendor "A&B Produce" {:vendor "A&B Produce"
:keywords [#"ABProduce"] :keywords [#"ABProduce"]
:extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)"
@@ -530,19 +521,19 @@
{:vendor "Performance Food Group - ROMA" {:vendor "Performance Food Group - ROMA"
:keywords [#"inquiries call 1-800-233-6211"] :keywords [#"inquiries call 1-800-233-6211"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})" :customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"^\s+([\dA-Z]+)" :invoice-number #"^\s+([\dA-Z]+)"
:total #"([\d\.,\-]+\.[\d\-]+)"} :total #"([\d\.,\-]+\.[\d\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"] :parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]} :total [:trim-commas-and-negate nil]}
:multi #"\n" :multi #"\n"
:multi-match? #"^\s+[\d]{6,8}\s+\d+"} :multi-match? #"^\s+[\d]{6,8}\s+\d+"}
;; ACME BREAD ;; ACME BREAD
{:vendor "Acme Bread" {:vendor "Acme Bread"
:keywords [#"acmebread\.com"] :keywords [#"acmebread\.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Print Date.*\n.*\n(.*)" :customer-identifier #"Print Date.*\n.*\n(.*)"
:invoice-number #"^\s*(\d+)" :invoice-number #"^\s*(\d+)"
:total #"\s{2,}(\d+\.\d{2})\s{2,}"} :total #"\s{2,}(\d+\.\d{2})\s{2,}"}
:parser {:date [:clj-time "MM/dd/yyyy"] :parser {:date [:clj-time "MM/dd/yyyy"]
@@ -554,19 +545,17 @@
{:vendor "Performance Food Group - ROMA" {:vendor "Performance Food Group - ROMA"
:keywords [#"Performance Food Group, Inc\n\f"] :keywords [#"Performance Food Group, Inc\n\f"]
:extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})" :customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"INVOICE NO.\s+ ([\d]+)" :invoice-number #"INVOICE NO.\s+ ([\d]+)"
:total #"([\d\.,]+)\s+INVOICE TOTAL"} :total #"([\d\.,]+)\s+INVOICE TOTAL"}
:parser {:date [:clj-time "MM/dd/yy"] :parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}} :total [:trim-commas nil]}}
;; JFC
;; JFC
{:vendor "JFC International" {:vendor "JFC International"
:keywords [#"48490 MILMONT DRIVE"] :keywords [#"48490 MILMONT DRIVE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"SOLD\s+([\S ]+?)(?=(\s{2,}|\n))" :customer-identifier #"SOLD\s+([\S ]+?)(?=(\s{2,}|\n))"
:invoice-number #"(\S+)\s+(?=[0-9]+/[0-9]+/[0-9]+)" :invoice-number #"(\S+)\s+(?=[0-9]+/[0-9]+/[0-9]+)"
:total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"} :total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"}
:parser {:date [:clj-time "MM/dd/yyyy"] :parser {:date [:clj-time "MM/dd/yyyy"]
@@ -576,7 +565,7 @@
{:vendor "Roma Bakery Inc." {:vendor "Roma Bakery Inc."
:keywords [#"Roma Bakery Inc"] :keywords [#"Roma Bakery Inc"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}" :customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice (\d+)" :invoice-number #"Invoice (\d+)"
:total #"Total\s+([\d\-\.]+)"} :total #"Total\s+([\d\-\.]+)"}
:parser {:date [:clj-time "MM/dd/yy"] :parser {:date [:clj-time "MM/dd/yy"]
@@ -586,7 +575,7 @@
{:vendor "Kael Foods" {:vendor "Kael Foods"
:keywords [#"kaelfoods.com"] :keywords [#"kaelfoods.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}" :customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE 0*(\d+)" :invoice-number #"INVOICE 0*(\d+)"
:total #"TOTAL:\s+\$([\d\-\.,]+)"} :total #"TOTAL:\s+\$([\d\-\.,]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"] :parser {:date [:clj-time "MM/dd/yyyy"]
@@ -596,7 +585,7 @@
{:vendor "Starter Bakery" {:vendor "Starter Bakery"
:keywords [#"starterbakery.com"] :keywords [#"starterbakery.com"]
:extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}" :extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}"
:customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}" :customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice.*?(\d+)" :invoice-number #"Invoice.*?(\d+)"
:total #"Total:.*?([\d\-,]+\.\d{2,2}+)"} :total #"Total:.*?([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MMMM dd, yyyy"] :parser {:date [:clj-time "MMMM dd, yyyy"]
@@ -606,9 +595,18 @@
{:vendor "TriMark R.W. Smith" {:vendor "TriMark R.W. Smith"
:keywords [#"TriMark"] :keywords [#"TriMark"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To\s+(.*?)\s{2,}" :customer-identifier #"Bill To\s+(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*?([\d\-]+)\n" :invoice-number #"Invoice #\n.*?([\d\-]+)\n"
:total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"} :total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Reel Produce"
:keywords [#"reelproduce.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To(?:.*?)\n\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*?([\d\-]+)\n"
:total #"Total\s*\n\s+\$([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yy"] :parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}]) :total [:trim-commas-and-negate nil]}}])

View File

@@ -136,7 +136,8 @@
(index-documents-raw [this index xs] (index-documents-raw [this index xs]
(client/post (client/post
(str (assoc (url/url solr-uri "solr" index "update") (str (assoc (url/url solr-uri "solr" index "update")
:query {"commitWithin" 5000})) :query {"commitWithin" 5000
"commit" true}))
{:headers {"Content-Type" "application/json"} {:headers {"Content-Type" "application/json"}
:socket-timeout 30000 :socket-timeout 30000
@@ -147,7 +148,8 @@
(index-documents [this index xs] (index-documents [this index xs]
(client/post (client/post
(str (assoc (url/url solr-uri "solr" index "update") (str (assoc (url/url solr-uri "solr" index "update")
:query {"commitWithin" 5000})) :query {"commitWithin" 5000
"commit" true}))
{:headers {"Content-Type" "application/json"} {:headers {"Content-Type" "application/json"}
:socket-timeout 30000 :socket-timeout 30000
:connection-timeout 30000 :connection-timeout 30000
@@ -168,7 +170,8 @@
(delete [this index] (delete [this index]
(client/post (client/post
(str (assoc (url/url solr-uri "solr" index "update") (str (assoc (url/url solr-uri "solr" index "update")
:query {"commitWithin" 15000})) :query {"commitWithin" 15000
"commit" true}))
{:headers {"Content-Type" "application/json"} {:headers {"Content-Type" "application/json"}
:method "POST" :method "POST"
:body (json/write-str {"delete" {"query" "*:*"}})}))) :body (json/write-str {"delete" {"query" "*:*"}})})))