Removes docjure, freeing space

This commit is contained in:
2023-05-10 10:39:58 -07:00
parent 05f1f009fd
commit 8bd73b8a98
5 changed files with 60 additions and 125 deletions

View File

@@ -2,18 +2,23 @@
(:require [auto-ap.parse.templates :as t]
[auto-ap.parse.util :as u]
[clojure.string :as str]
[dk.ative.docjure.spreadsheet :as d])
(:import (org.apache.poi.ss.util CellAddress)))
[amazonica.aws.lambda :as lambda]
[clojure.data.json :as json]
[config.core :refer [env]]
[clojure.java.io :as io]
[amazonica.aws.s3 :as s3])
)
(defn template-applies? [text {:keys [keywords]}]
(every? #(re-find % text) keywords))
(defn extract [wb {:keys [extract vendor parser]}]
(if (fn? extract)
(extract wb vendor)
[(reduce-kv
#_[(reduce-kv
(fn [invoice k [regex offset-row offset-column extract-regex]]
(assoc invoice k
(->> wb
@@ -38,17 +43,34 @@
{:vendor-code vendor}
extract)]))
(defn extract-sheet-details [bucket object]
(-> (lambda/invoke {:function-name "xls-extractor" :payload
(json/write-str
{"s3_url" object "s3_bucket" bucket})})
:payload
slurp
json/read-str))
(defn parse-file
[file _]
(let [wb (d/load-workbook file)
text (->> wb
(d/sheet-seq)
first
(d/cell-seq)
(map d/read-cell)
(str/join " "))]
(let [tmp-key (str "xls-invoice/import/" (java.util.UUID/randomUUID))
_ (with-open [f (io/input-stream file)]
(s3/put-object {:bucket-name (:data-bucket env)
:key tmp-key
:input-stream f}))
sheet (extract-sheet-details (:data-bucket env) tmp-key)
text (str/join " " (mapcat seq sheet))]
(->> t/excel-templates
(filter (partial template-applies? text))
first
(extract wb)
)))
(extract sheet))))
(defn xls-date->date [f]
(when (not-empty f)
(let [f (Double/parseDouble f)
unix-days (- f 25569.0)
unix-secs (* unix-days 86400.0)]
(java.util.Date. (long (Math/round (* 1000.0 unix-secs)))))))

View File

@@ -1,8 +1,6 @@
(ns auto-ap.parse.templates
(:require [dk.ative.docjure.spreadsheet :as d]
[auto-ap.parse.util :as u]
[clojure.string :as str])
(:import (org.apache.poi.ss.util CellAddress)))
(:require [auto-ap.parse.util :as u]
[clojure.string :as str]))
(def pdf-templates
@@ -614,97 +612,27 @@
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}])
(defn offset [c x y]
(.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) )))
(def excel-templates
[{:vendor "Isp Productions"
:keywords [#"ISP PRODUCTIONS"]
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
:total [#"PAY THIS" -1 0]
:date [#"INVOICE DATE" 0 1]
:invoice-number [#"INVOICE NUMBER" 0 1]}}
{:vendor "Southern Glazers"
:keywords [#"Please note that the total invoice amount may"]
:extract {:customer-identifier [#"Customer #" 1 0]
:total [#"Subtotal" 0 16 ]
:date [#"Date" 0 0 #"Date: (.*)"]
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]
:account-number [#"Customer #" 0 0 #"Customer #: (.*)"]}
:parser { :total [:trim-commas-and-remove-dollars-and-invert-parentheses nil]
:date [:clj-time "MM/dd/yyyy"]}}
{:vendor "Mama Lu's Foods"
[{:vendor "Mama Lu's Foods"
:keywords [#"Mama Lu's Foods"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp
(drop 5)
(filter
:extract (fn [sheet vendor]
(transduce (comp
(drop 5)
(filter
(fn [r]
(and
r
(->> r d/cell-seq second d/read-cell))))
(map
(seq r)
(->> r second not-empty))))
(map
(fn [r]
(let [[_ customer-order-number num date name amount] (map d/read-cell (d/cell-seq r))]
(let [[_ customer-order-number num date name amount] r]
{:customer-identifier (second (re-find #"([^:]*):" name))
:text name
:full-text name
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
:invoice-number (str customer-order-number "-" (int num))
:total (str amount)
:vendor-code vendor}))))
conj
[]
(d/row-seq sheet))))}
{:vendor "DVW Commercial"
:keywords [#"Total for" #"Num"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp (filter (fn [c]
(re-find #"Invoice" (str (d/read-cell c)))))
(map (fn [c]
(let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet)
(iterate (fn [c]
(d/select-cell (offset c 0 -1) sheet)))
(filter (fn [c]
(not (str/blank? (d/read-cell c)))))
first))]
{:customer-identifier customer-identifier
:text customer-identifier
:full-text customer-identifier
:date (d/read-cell (d/select-cell (offset c 2 0) sheet))
:invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet))
:total (str (d/read-cell (d/select-cell (offset c 8 0) sheet)))
:vendor-code vendor}))))
conj
[]
(d/cell-seq sheet))))}
{:vendor "Chef's Choice Produce Co"
:keywords [#"Alt_invoice_number"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp
(drop-while (fn [c]
(not (re-find #"Customer_id" (str (d/read-cell c))))))
(drop 9)
(filter (fn [c]
(= 0 (.getColumnIndex c))))
(filter (fn [c]
(not (str/blank? (str/trim (or (d/read-cell (d/select-cell (offset c 1 0) sheet)) ""))))))
(map (fn [c]
{:customer-identifier (str/trim (d/read-cell (d/select-cell (offset c 1 0) sheet)))
:text (d/read-cell (d/select-cell (offset c 1 0) sheet))
:full-text (d/read-cell (d/select-cell (offset c 1 0) sheet))
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim (d/read-cell (d/select-cell (offset c 5 0) sheet))))
:invoice-number (->>
(re-find #"^(?:0+([A-Z0-9]+))|([A-Z]+[A-Z0-9]+)" (str/trim (d/read-cell (d/select-cell (offset c 2 0) sheet))))
(drop 1 )
(filter identity)
first)
:total (str (d/read-cell (d/select-cell (offset c 7 0) sheet)))
:vendor-code vendor}))
(filter :customer-identifier))
conj
[]
(d/cell-seq sheet))))}])
:text name
:full-text name
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
:invoice-number (str customer-order-number "-" (Integer/parseInt num))
:total (str amount)
:vendor-code vendor}))))
conj
[]
sheet))}])

View File

@@ -3,6 +3,7 @@
[auto-ap.datomic :refer [audit-transact conn]]
[auto-ap.logging :as alog]
[clojure.data.json :as json]
[auto-ap.parse.excel :as excel]
[auto-ap.parse :as parse]
[amazonica.aws.lambda :as lambda]
[config.core :refer [env]]
@@ -16,7 +17,6 @@
[clojure.java.io :as io]
[com.brunobonacci.mulog :as mu]
[datomic.api :as dc]
[dk.ative.docjure.spreadsheet :as doc]
[hiccup2.core :as hiccup]
[amazonica.aws.s3 :as s3]))
@@ -27,14 +27,7 @@
(.setScale 2 java.math.RoundingMode/HALF_UP)
(double))))
(defn extract-sheet-details [bucket object]
(-> (lambda/invoke {:function-name "xls-extractor" :payload
(json/write-str
{"s3_url" object "s3_bucket" bucket})}
)
:payload
slurp
json/read-str))
(defn rows->maps [rows]
@@ -43,12 +36,7 @@
(into {}
(map vector headers r)))))
(defn xls-date->date [f]
(when (not-empty f)
(let [f (Double/parseDouble f)
unix-days (- f 25569.0)
unix-secs (* unix-days 86400.0)]
(java.util.Date. (long (Math/round (* 1000.0 unix-secs)))))))
(defn map->sales-order [r clients]
@@ -67,7 +55,7 @@
(parse/exact-match clients))
client-id (:db/id client)
location (first (:client/locations client))
event-date (some-> (xls-date->date event-date)
event-date (some-> (excel/xls-date->date event-date)
coerce/to-date-time
atime/as-local-time
coerce/to-date )]
@@ -137,7 +125,7 @@
:key object
:input-stream s})
(into []
(->> (extract-sheet-details (:data-bucket env) object)
(->> (excel/extract-sheet-details (:data-bucket env) object)
rows->maps
(map #(map->sales-order % clients))
(filter identity)))))