fixes bugs.

This commit is contained in:
2025-09-23 22:53:29 -07:00
parent 4c2369c16c
commit 45012e85cc
4 changed files with 378 additions and 34 deletions

View File

@@ -1,7 +1,9 @@
(ns auto-ap.parse.templates
(:require [auto-ap.parse.util :as u]
[auto-ap.logging :as alog]
[clojure.string :as str]))
[clj-time.core :as time]
[clojure.string :as str]
[auto-ap.time :as atime]))
(def pdf-templates
@@ -13,7 +15,7 @@
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
{:vendor "Gstar Seafood"
:keywords [#"G Star Seafood"]
:extract {:total #"Total\s{2,}([\d\-,]+\.\d{2,2}+)"
@@ -42,10 +44,10 @@
:total #"INVOICE TOTAL\s+([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}
:multi #"\f\f"}
;; IMPACT PAPER
{:vendor "Impact Paper & Ink LTD"
{:vendor "Impact Paper & Ink LTD"
:keywords [#"650-692-5598"]
:extract {:total #"Total Amount\s+\$([\d\.\,\-]+)"
:account-number #"CUST. #\n.*?/\d{4,}\s+(.*?)\n"
@@ -94,7 +96,7 @@
:total #"Total Invoice\s+([\-]?[0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Ben E. Keith"
:keywords [#"BEN E. KEITH"]
:extract {:date #"Customer No Mo Day Yr.*?\n.*?\d{5,}\s{2,}(\d+\s+\d+\s+\d+)"
@@ -104,7 +106,7 @@
:parser {:date [:month-day-year nil]
:total [:trim-commas-and-negate nil]}}
;; SOUTHBAY FRESH
;; SOUTHBAY FRESH
{:vendor "Southbay Fresh Produce"
:keywords [#"(SOUTH BAY FRESH PRODUCE|SOUTH BAY PRODUCE)"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
@@ -115,25 +117,25 @@
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
;; DON VITO
;; DON VITO
{:vendor "Don Vito Ozuna Food Corp"
:keywords [#"408-465-2010"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*?\n(.*?)\s{2,}"
:invoice-number #"(?:[0-9]+/[0-9]+/[0-9]+)\s{2,}(\d+)"
:total #"Please remit payment to\s{2,}\$([\-0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]} }
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; DON VITO STATEMENT
{:vendor "Don Vito Ozuna Food Corp"
{:vendor "Don Vito Ozuna Food Corp"
:keywords [#"Don Vito Ozuna Food Corp.*?\n.*?Statement"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:.*?\n\s*(.*)?\s{2,}"
:invoice-number #"INV #(\d+)"
:total #"Amount \$([\d\-\.]+?)\.\s{2,}"}
:parser {:date [:clj-time "MM/dd/yyyy"]}
:multi #"\n"
:multi-match? #"\d+/\d+/\d+.*?INV"}
:multi #"\n"
:multi-match? #"\d+/\d+/\d+.*?INV"}
;; PFG - LEDYARD
{:vendor "Performance Food Group - LEDYARD"
@@ -221,8 +223,8 @@
:total [:trim-commas nil]}
:multi (. java.util.regex.Pattern (compile (-> \formfeed str) java.util.regex.Pattern/CASE_INSENSITIVE))
:multi-match? #"(Total\s+[0-9\.]+|Total Order)"}
;; AUTO-CHLOR
;; AUTO-CHLOR
{:vendor "Auto-Chlor"
:keywords [#"AUTO-CHLOR"]
:extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)"
@@ -254,7 +256,7 @@
:multi #"\n"
:multi-match? #"^\s+.*?\d{6,}.*?\$"}
;; C & L
;; C & L
{:vendor "C&L Produce"
:keywords [#"440 Franklin Street"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
@@ -368,7 +370,7 @@
:total [:trim-commas nil]}}
;; Breakthru Bev
;; Breakthru Bev
{:vendor "Wine Warehouse"
:keywords [#"BREAKTHRU BEVERAGE"]
:extract {:date #"Invoice Date:\s+([0-9]+/[0-9]+/[0-9]+)"
@@ -377,7 +379,7 @@
:account-number #"Customer #:\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; THE WATER PROS
{:vendor "The Water Pros"
:keywords [#"The Water Pros, Inc"]
@@ -418,7 +420,7 @@
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; PACIFIC SEAFOOD
;; PACIFIC SEAFOOD
{:vendor "Pacific Seafood"
:keywords [#"(pacseafood|PACIFIC FRESH)"]
:extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)"
@@ -490,7 +492,7 @@
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; A&B
;; A&B
{:vendor "A&B Produce"
:keywords [#"ABProduce"]
:extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)"
@@ -623,7 +625,7 @@
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; JFC
;; JFC
{:vendor "JFC International"
:keywords [#"48490 MILMONT DRIVE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
@@ -723,7 +725,7 @@
:total #"Invoice Total:\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Mani Imports"
:keywords [#"Mani Imports"]
:extract {:date #"Order Date\s+([0-9]+/[0-9]+/[0-9]+)"
@@ -731,7 +733,7 @@
:invoice-number #"Invoice Number:\s+(.*?)\n"
:total #"Invoice Total:\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]} }
:total [:trim-commas-and-negate nil]}}
{:vendor "Reel Produce"
:keywords [#"REEL Produce, Inc" #"Statem"]
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
@@ -789,15 +791,36 @@
(and
(seq r)
(->> r first not-empty))))
(take 4)
(map
(fn [[customer-number _ _ _ invoice-number date amount :as row]]
(println "DAT E is" date)
{:customer-identifier customer-number
:text (str/join " " row)
:full-text (str/join " " row)
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
:date (try (or (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
(try
(atime/as-local-time
(time/plus (time/date-time 1900 1 1)
(time/days (dec (dec (Integer/parseInt "45663"))))))
(catch Exception e
nil)
))
(catch Exception e
(try
(atime/as-local-time
(time/plus (time/date-time 1900 1 1)
(time/days (dec (dec (Integer/parseInt "45663"))))))
(catch Exception e
nil)
)
))
:invoice-number invoice-number
:total (str amount)
:vendor-code vendor})))
conj
[]
sheet)))}])