From aaa7c54f0c85a3a1e03577759b65c97cebdc0b17 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Sat, 6 May 2023 06:47:44 -0700 Subject: [PATCH 1/4] fixes check margins. --- src/clj/auto_ap/graphql/checks.clj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/clj/auto_ap/graphql/checks.clj b/src/clj/auto_ap/graphql/checks.clj index b27ddc5b..c9b2fb7d 100644 --- a/src/clj/auto_ap/graphql/checks.clj +++ b/src/clj/auto_ap/graphql/checks.clj @@ -164,7 +164,7 @@ [[:cell {:colspan 12} [:spacer]]] [[:cell {:colspan 12} [:spacer]]] - [[:cell {:colspan 5}] + [[:cell {:colspan 3}] [:cell {:align :right :colspan 2} "Check:\n" "Vendor:\n" @@ -174,7 +174,7 @@ "Amount:\n" "Date:\n"] - [:cell {:colspan 5} + [:cell {:colspan 7} [:paragraph check] [:paragraph vendor-name] [:paragraph (:client/name client)] From 13b9bec09f74cdc143900e160db355ca598073d8 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Mon, 8 May 2023 20:55:06 -0700 Subject: [PATCH 2/4] fixing minor bugs in general produce import --- src/clj/auto_ap/jobs/ntg.clj | 72 +++++++++++++++++--------------- src/clj/auto_ap/square/core3.clj | 5 ++- 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/src/clj/auto_ap/jobs/ntg.clj b/src/clj/auto_ap/jobs/ntg.clj index 7dd59dba..0cc9f3ed 100644 --- a/src/clj/auto_ap/jobs/ntg.clj +++ b/src/clj/auto_ap/jobs/ntg.clj @@ -65,40 +65,43 @@ (defmethod extract-invoice-details :general-produce [k input-stream clients] (log/info ::parsing-general-produce :key k) - (try - (->> (read-csv input-stream) - (drop 1) - (filter (fn [[_ _ _ _ _ _ _ _ _ _ _ break-flag]] - (= "Y" break-flag))) - (map (fn [[_ location-hint invoice-number ship-date invoice-total ]] - (let [matching-client (and location-hint - (parse/exact-match clients location-hint)) - location (parse/best-location-match matching-client location-hint location-hint ) - vendor (d/pull (d/db conn) '[:vendor/default-account] :vendor/general-produce)] - (when-not matching-client - (log/warn ::missing-client - :client-hint location-hint)) - {:invoice/location location - :invoice/date (coerce/to-date (atime/parse ship-date atime/normal-date)) - :invoice/invoice-number invoice-number - :invoice/total (Double/parseDouble invoice-total) - :invoice/vendor :vendor/general-produce - :invoice/outstanding-balance (Double/parseDouble invoice-total) - :invoice/client (:db/id matching-client) - :invoice/import-status :import-status/imported - :invoice/status :invoice-status/unpaid - :invoice/client-identifier location-hint - :invoice/expense-accounts [{:invoice-expense-account/account - (-> vendor :vendor/default-account :db/id) - :invoice-expense-account/location location - :invoice-expense-account/amount (Math/abs (Double/parseDouble invoice-total)) - }]}))) - (filter :invoice/client) - (into [])) - (catch Exception e - (log/error ::cant-import-general-produce - :error e) - []))) + (let [missing-client-hints (atom #{})] + (try + (->> (read-csv input-stream) + (drop 1) + #_(filter (fn [[_ _ _ _ _ _ _ _ _ _ _ break-flag]] + (= "Y" break-flag))) + (map (fn [[_ location-hint invoice-number ship-date invoice-total ]] + (let [matching-client (and location-hint + (parse/exact-match clients location-hint)) + location (parse/best-location-match matching-client location-hint location-hint ) + vendor (d/pull (d/db conn) '[:vendor/default-account] :vendor/general-produce)] + (when-not (and matching-client + (not (@missing-client-hints location-hint))) + (log/warn ::missing-client + :client-hint location-hint) + (swap! missing-client-hints conj location-hint)) + {:invoice/location location + :invoice/date (coerce/to-date (atime/parse ship-date atime/normal-date)) + :invoice/invoice-number invoice-number + :invoice/total (Double/parseDouble invoice-total) + :invoice/vendor :vendor/general-produce + :invoice/outstanding-balance (Double/parseDouble invoice-total) + :invoice/client (:db/id matching-client) + :invoice/import-status :import-status/imported + :invoice/status :invoice-status/unpaid + :invoice/client-identifier location-hint + :invoice/expense-accounts [{:invoice-expense-account/account + (-> vendor :vendor/default-account :db/id) + :invoice-expense-account/location location + :invoice-expense-account/amount (Math/abs (Double/parseDouble invoice-total)) + }]}))) + (filter :invoice/client) + (into [])) + (catch Exception e + (log/error ::cant-import-general-produce + :error e) + [])))) (defmethod extract-invoice-details :unknown [k input-stream clients] @@ -245,6 +248,7 @@ (->> (extract-invoice-details k is clients) + (set) (map (fn [i] (log/info ::importing-invoice :invoice i) diff --git a/src/clj/auto_ap/square/core3.clj b/src/clj/auto_ap/square/core3.clj index ae766f3e..d81f64db 100644 --- a/src/clj/auto_ap/square/core3.clj +++ b/src/clj/auto_ap/square/core3.clj @@ -631,7 +631,7 @@ (defn cash-drawer-shifts ([client l] - (cash-drawer-shifts client l (time/plus (time/now) (time/days -14)) (time/now))) + (cash-drawer-shifts client l (time/plus (time/now) (time/days -75)) (time/now))) ([client l start end] (de/chain (manifold-api-call {:url (str "https://connect.squareup.com/v2/cash-drawers/shifts" "?" @@ -639,7 +639,8 @@ (url/map->query {:location_id (:square-location/square-id l) :begin_time (->square-date start) - :end_time (->square-date end)})) + :end_time (->square-date end) + :limit 1000})) :method :get :headers (client-base-headers client "2023-04-19") From 05f1f009fdf9b590a89c9491bf0b6daace865c98 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Tue, 9 May 2023 16:32:00 -0700 Subject: [PATCH 3/4] Adds cloud approach to reading excel --- project.clj | 3 + src/clj/auto_ap/routes/ezcater_xls.clj | 64 ++++++++++++------- .../integration/routes/ezcater_xls.clj | 2 +- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/project.clj b/project.clj index 2c371aa1..1a5039ef 100644 --- a/project.clj +++ b/project.clj @@ -69,6 +69,9 @@ [com.amazonaws/aws-java-sdk-sqs "1.11.926" :exclusions [commons-codec org.apache.httpcomponents/httpclient]] + [com.amazonaws/aws-java-sdk-lambda "1.11.926" + :exclusions [commons-codec + org.apache.httpcomponents/httpclient]] [com.amazonaws/aws-java-sdk-ecs "1.11.926" :exclusions [commons-codec diff --git a/src/clj/auto_ap/routes/ezcater_xls.clj b/src/clj/auto_ap/routes/ezcater_xls.clj index a78b01c0..8bd53bc4 100644 --- a/src/clj/auto_ap/routes/ezcater_xls.clj +++ b/src/clj/auto_ap/routes/ezcater_xls.clj @@ -2,7 +2,10 @@ (:require [auto-ap.datomic :refer [audit-transact conn]] [auto-ap.logging :as alog] + [clojure.data.json :as json] [auto-ap.parse :as parse] + [amazonica.aws.lambda :as lambda] + [config.core :refer [env]] [auto-ap.shared-views.admin.side-bar :refer [admin-side-bar]] [auto-ap.ssr-routes :as ssr-routes] [auto-ap.ssr.ui :refer [base-page]] @@ -14,7 +17,8 @@ [com.brunobonacci.mulog :as mu] [datomic.api :as dc] [dk.ative.docjure.spreadsheet :as doc] - [hiccup2.core :as hiccup])) + [hiccup2.core :as hiccup] + [amazonica.aws.s3 :as s3])) (defn fmt-amount [a] (with-precision 2 @@ -23,15 +27,15 @@ (.setScale 2 java.math.RoundingMode/HALF_UP) (double)))) -(defn extract-sheet-details [f] - (into [] - (for [row (->> (doc/load-workbook f) - (doc/sheet-seq) - first - (doc/row-seq) - )] - (mapv doc/read-cell (doc/cell-seq row)) - ))) +(defn extract-sheet-details [bucket object] + (-> (lambda/invoke {:function-name "xls-extractor" :payload + (json/write-str + {"s3_url" object "s3_bucket" bucket})} + ) + :payload + slurp + json/read-str)) + (defn rows->maps [rows] (let [[headers & rows] rows] @@ -39,26 +43,37 @@ (into {} (map vector headers r))))) +(defn xls-date->date [f] + (when (not-empty f) + (let [f (Double/parseDouble f) + unix-days (- f 25569.0) + unix-secs (* unix-days 86400.0)] + (java.util.Date. (long (Math/round (* 1000.0 unix-secs))))))) + (defn map->sales-order [r clients] (let [order-number (get r "Order Number") event-date (get r "Event Date") store-name (get r "Store Name") - adjustments (get r "Adjustments") - tax (get r "Sales Tax") - food-total (get r "Food Total") - commission (get r "Commission") - fee (get r "Payment Transaction Fee") - tip (get r "Tip") + adjustments (some-> (get r "Adjustments") not-empty (Double/parseDouble)) + tax (some-> (get r "Sales Tax") not-empty (Double/parseDouble)) + food-total (some-> (get r "Food Total") not-empty (Double/parseDouble)) + commission (some-> (get r "Commission") not-empty (Double/parseDouble)) + fee (some-> (get r "Payment Transaction Fee") not-empty (Double/parseDouble)) + tip (some-> (get r "Tip") not-empty (Double/parseDouble)) caterer-name (get r "Caterer Name") client (some->> caterer-name not-empty (parse/exact-match clients)) client-id (:db/id client) - location (first (:client/locations client))] + location (first (:client/locations client)) + event-date (some-> (xls-date->date event-date) + coerce/to-date-time + atime/as-local-time + coerce/to-date )] (cond (and event-date client-id location ) [:order #:sales-order - {:date (coerce/to-date (atime/localize (coerce/to-date-time event-date))) + {:date event-date :external-id (str "ezcater/order/" client-id "-" location "-" order-number) :client client-id :location location @@ -75,7 +90,7 @@ :charges [#:charge {:type-name "CARD" - :date (coerce/to-date (atime/localize (coerce/to-date-time event-date))) + :date event-date :client client-id :location location :external-id (str "ezcater/charge/" client-id "-" location "-" order-number "-" 0) @@ -114,10 +129,15 @@ :client/matches :client/locations]) :where [?c :client/code]] - (dc/db conn)))] + (dc/db conn))) + object (str "/ezcater-xls/" (str (java.util.UUID/randomUUID)))] + (mu/log ::writing-temp-xls + :location object) + (s3/put-object {:bucket-name (:data-bucket env) + :key object + :input-stream s}) (into [] - (->> s - extract-sheet-details + (->> (extract-sheet-details (:data-bucket env) object) rows->maps (map #(map->sales-order % clients)) (filter identity))))) diff --git a/test/clj/auto_ap/integration/routes/ezcater_xls.clj b/test/clj/auto_ap/integration/routes/ezcater_xls.clj index 0e4eecbf..4559a217 100644 --- a/test/clj/auto_ap/integration/routes/ezcater_xls.clj +++ b/test/clj/auto_ap/integration/routes/ezcater_xls.clj @@ -8,7 +8,7 @@ (use-fixtures :each wrap-setup) -(deftest stream->sales-ordersx +(deftest stream->sales-orders (testing "Should import nothing when there are no clients" (with-open [s (io/input-stream (io/resource "sample-ezcater.xlsx"))] (is (= [:missing "Nick The Greek (Santa Cruz)"] (first (sut/stream->sales-orders s)))))) From 8bd73b8a9862f076344e6aad31d4792243c94254 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Wed, 10 May 2023 10:39:58 -0700 Subject: [PATCH 4/4] Removes docjure, freeing space --- project.clj | 2 - src/clj/auto_ap/parse/excel.clj | 46 +++++-- src/clj/auto_ap/parse/templates.clj | 112 ++++-------------- src/clj/auto_ap/routes/ezcater_xls.clj | 22 +--- .../integration/routes/ezcater_xls.clj | 3 +- 5 files changed, 60 insertions(+), 125 deletions(-) diff --git a/project.clj b/project.clj index 1a5039ef..69a5f7a2 100644 --- a/project.clj +++ b/project.clj @@ -29,8 +29,6 @@ [ring/ring-jetty-adapter "1.9.6" :exclusions [org.eclipse.jetty/jetty-server]] [yogthos/config "1.1.7"] - [dk.ative/docjure "1.14.0"] - [clj-fuzzy "0.4.1"] [com.walmartlabs/lacinia "0.37.0"] [vincit/venia "0.2.5"] diff --git a/src/clj/auto_ap/parse/excel.clj b/src/clj/auto_ap/parse/excel.clj index 4130dfeb..1f3b21f7 100644 --- a/src/clj/auto_ap/parse/excel.clj +++ b/src/clj/auto_ap/parse/excel.clj @@ -2,18 +2,23 @@ (:require [auto-ap.parse.templates :as t] [auto-ap.parse.util :as u] [clojure.string :as str] - [dk.ative.docjure.spreadsheet :as d]) - (:import (org.apache.poi.ss.util CellAddress))) + [amazonica.aws.lambda :as lambda] + [clojure.data.json :as json] + [config.core :refer [env]] + [clojure.java.io :as io] + [amazonica.aws.s3 :as s3]) + ) (defn template-applies? [text {:keys [keywords]}] + (every? #(re-find % text) keywords)) (defn extract [wb {:keys [extract vendor parser]}] (if (fn? extract) (extract wb vendor) - [(reduce-kv + #_[(reduce-kv (fn [invoice k [regex offset-row offset-column extract-regex]] (assoc invoice k (->> wb @@ -38,17 +43,34 @@ {:vendor-code vendor} extract)])) +(defn extract-sheet-details [bucket object] + (-> (lambda/invoke {:function-name "xls-extractor" :payload + (json/write-str + {"s3_url" object "s3_bucket" bucket})}) + :payload + slurp + json/read-str)) + (defn parse-file [file _] - (let [wb (d/load-workbook file) - text (->> wb - (d/sheet-seq) - first - (d/cell-seq) - (map d/read-cell) - (str/join " "))] + (let [tmp-key (str "xls-invoice/import/" (java.util.UUID/randomUUID)) + _ (with-open [f (io/input-stream file)] + (s3/put-object {:bucket-name (:data-bucket env) + :key tmp-key + :input-stream f})) + sheet (extract-sheet-details (:data-bucket env) tmp-key) + text (str/join " " (mapcat seq sheet))] (->> t/excel-templates (filter (partial template-applies? text)) first - (extract wb) - ))) + (extract sheet)))) + + + + +(defn xls-date->date [f] + (when (not-empty f) + (let [f (Double/parseDouble f) + unix-days (- f 25569.0) + unix-secs (* unix-days 86400.0)] + (java.util.Date. (long (Math/round (* 1000.0 unix-secs))))))) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index e05614c4..d3b270cc 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -1,8 +1,6 @@ (ns auto-ap.parse.templates - (:require [dk.ative.docjure.spreadsheet :as d] - [auto-ap.parse.util :as u] - [clojure.string :as str]) - (:import (org.apache.poi.ss.util CellAddress))) + (:require [auto-ap.parse.util :as u] + [clojure.string :as str])) (def pdf-templates @@ -614,97 +612,27 @@ :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}}]) -(defn offset [c x y] - (.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) ))) - (def excel-templates - [{:vendor "Isp Productions" - :keywords [#"ISP PRODUCTIONS"] - :extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0] - :total [#"PAY THIS" -1 0] - :date [#"INVOICE DATE" 0 1] - :invoice-number [#"INVOICE NUMBER" 0 1]}} - {:vendor "Southern Glazers" - :keywords [#"Please note that the total invoice amount may"] - :extract {:customer-identifier [#"Customer #" 1 0] - :total [#"Subtotal" 0 16 ] - :date [#"Date" 0 0 #"Date: (.*)"] - :invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"] - :account-number [#"Customer #" 0 0 #"Customer #: (.*)"]} - :parser { :total [:trim-commas-and-remove-dollars-and-invert-parentheses nil] - :date [:clj-time "MM/dd/yyyy"]}} - {:vendor "Mama Lu's Foods" + [{:vendor "Mama Lu's Foods" :keywords [#"Mama Lu's Foods"] - :extract (fn [wb vendor] - (let [[sheet] (d/sheet-seq wb)] - (transduce (comp - (drop 5) - (filter + :extract (fn [sheet vendor] + (transduce (comp + (drop 5) + (filter (fn [r] (and - r - (->> r d/cell-seq second d/read-cell)))) - (map + (seq r) + (->> r second not-empty)))) + (map (fn [r] - (let [[_ customer-order-number num date name amount] (map d/read-cell (d/cell-seq r))] + (let [[_ customer-order-number num date name amount] r] {:customer-identifier (second (re-find #"([^:]*):" name)) - :text name - :full-text name - :date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date)) - :invoice-number (str customer-order-number "-" (int num)) - :total (str amount) - :vendor-code vendor})))) - conj - [] - (d/row-seq sheet))))} - {:vendor "DVW Commercial" - :keywords [#"Total for" #"Num"] - :extract (fn [wb vendor] - (let [[sheet] (d/sheet-seq wb)] - (transduce (comp (filter (fn [c] - (re-find #"Invoice" (str (d/read-cell c))))) - (map (fn [c] - (let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet) - (iterate (fn [c] - (d/select-cell (offset c 0 -1) sheet))) - (filter (fn [c] - (not (str/blank? (d/read-cell c))))) - first))] - {:customer-identifier customer-identifier - :text customer-identifier - :full-text customer-identifier - :date (d/read-cell (d/select-cell (offset c 2 0) sheet)) - :invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet)) - :total (str (d/read-cell (d/select-cell (offset c 8 0) sheet))) - :vendor-code vendor})))) - conj - [] - (d/cell-seq sheet))))} - {:vendor "Chef's Choice Produce Co" - :keywords [#"Alt_invoice_number"] - :extract (fn [wb vendor] - (let [[sheet] (d/sheet-seq wb)] - (transduce (comp - (drop-while (fn [c] - (not (re-find #"Customer_id" (str (d/read-cell c)))))) - (drop 9) - (filter (fn [c] - (= 0 (.getColumnIndex c)))) - (filter (fn [c] - (not (str/blank? (str/trim (or (d/read-cell (d/select-cell (offset c 1 0) sheet)) "")))))) - (map (fn [c] - {:customer-identifier (str/trim (d/read-cell (d/select-cell (offset c 1 0) sheet))) - :text (d/read-cell (d/select-cell (offset c 1 0) sheet)) - :full-text (d/read-cell (d/select-cell (offset c 1 0) sheet)) - :date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim (d/read-cell (d/select-cell (offset c 5 0) sheet)))) - :invoice-number (->> - (re-find #"^(?:0+([A-Z0-9]+))|([A-Z]+[A-Z0-9]+)" (str/trim (d/read-cell (d/select-cell (offset c 2 0) sheet)))) - (drop 1 ) - (filter identity) - first) - :total (str (d/read-cell (d/select-cell (offset c 7 0) sheet))) - :vendor-code vendor})) - (filter :customer-identifier)) - conj - [] - (d/cell-seq sheet))))}]) + :text name + :full-text name + :date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date)) + :invoice-number (str customer-order-number "-" (Integer/parseInt num)) + :total (str amount) + :vendor-code vendor})))) + conj + [] + sheet))}]) diff --git a/src/clj/auto_ap/routes/ezcater_xls.clj b/src/clj/auto_ap/routes/ezcater_xls.clj index 8bd53bc4..dc45f3cf 100644 --- a/src/clj/auto_ap/routes/ezcater_xls.clj +++ b/src/clj/auto_ap/routes/ezcater_xls.clj @@ -3,6 +3,7 @@ [auto-ap.datomic :refer [audit-transact conn]] [auto-ap.logging :as alog] [clojure.data.json :as json] + [auto-ap.parse.excel :as excel] [auto-ap.parse :as parse] [amazonica.aws.lambda :as lambda] [config.core :refer [env]] @@ -16,7 +17,6 @@ [clojure.java.io :as io] [com.brunobonacci.mulog :as mu] [datomic.api :as dc] - [dk.ative.docjure.spreadsheet :as doc] [hiccup2.core :as hiccup] [amazonica.aws.s3 :as s3])) @@ -27,14 +27,7 @@ (.setScale 2 java.math.RoundingMode/HALF_UP) (double)))) -(defn extract-sheet-details [bucket object] - (-> (lambda/invoke {:function-name "xls-extractor" :payload - (json/write-str - {"s3_url" object "s3_bucket" bucket})} - ) - :payload - slurp - json/read-str)) + (defn rows->maps [rows] @@ -43,12 +36,7 @@ (into {} (map vector headers r))))) -(defn xls-date->date [f] - (when (not-empty f) - (let [f (Double/parseDouble f) - unix-days (- f 25569.0) - unix-secs (* unix-days 86400.0)] - (java.util.Date. (long (Math/round (* 1000.0 unix-secs))))))) + (defn map->sales-order [r clients] @@ -67,7 +55,7 @@ (parse/exact-match clients)) client-id (:db/id client) location (first (:client/locations client)) - event-date (some-> (xls-date->date event-date) + event-date (some-> (excel/xls-date->date event-date) coerce/to-date-time atime/as-local-time coerce/to-date )] @@ -137,7 +125,7 @@ :key object :input-stream s}) (into [] - (->> (extract-sheet-details (:data-bucket env) object) + (->> (excel/extract-sheet-details (:data-bucket env) object) rows->maps (map #(map->sales-order % clients)) (filter identity))))) diff --git a/test/clj/auto_ap/integration/routes/ezcater_xls.clj b/test/clj/auto_ap/integration/routes/ezcater_xls.clj index 4559a217..366c6208 100644 --- a/test/clj/auto_ap/integration/routes/ezcater_xls.clj +++ b/test/clj/auto_ap/integration/routes/ezcater_xls.clj @@ -20,8 +20,7 @@ :client/name "The client" :client/matches ["Nick the Greek (Elk Grove)"])])] (with-open [s (io/input-stream (io/resource "sample-ezcater.xlsx"))] - (is (seq (sut/stream->sales-orders s))) - ) + (is (seq (sut/stream->sales-orders s)))) (with-open [s (io/input-stream (io/resource "sample-ezcater.xlsx"))] (is (= #:sales-order {:vendor :vendor/ccp-ezcater