Removes docjure, freeing space
This commit is contained in:
@@ -2,18 +2,23 @@
|
||||
(:require [auto-ap.parse.templates :as t]
|
||||
[auto-ap.parse.util :as u]
|
||||
[clojure.string :as str]
|
||||
[dk.ative.docjure.spreadsheet :as d])
|
||||
(:import (org.apache.poi.ss.util CellAddress)))
|
||||
[amazonica.aws.lambda :as lambda]
|
||||
[clojure.data.json :as json]
|
||||
[config.core :refer [env]]
|
||||
[clojure.java.io :as io]
|
||||
[amazonica.aws.s3 :as s3])
|
||||
)
|
||||
|
||||
|
||||
|
||||
(defn template-applies? [text {:keys [keywords]}]
|
||||
|
||||
(every? #(re-find % text) keywords))
|
||||
|
||||
(defn extract [wb {:keys [extract vendor parser]}]
|
||||
(if (fn? extract)
|
||||
(extract wb vendor)
|
||||
[(reduce-kv
|
||||
#_[(reduce-kv
|
||||
(fn [invoice k [regex offset-row offset-column extract-regex]]
|
||||
(assoc invoice k
|
||||
(->> wb
|
||||
@@ -38,17 +43,34 @@
|
||||
{:vendor-code vendor}
|
||||
extract)]))
|
||||
|
||||
(defn extract-sheet-details [bucket object]
|
||||
(-> (lambda/invoke {:function-name "xls-extractor" :payload
|
||||
(json/write-str
|
||||
{"s3_url" object "s3_bucket" bucket})})
|
||||
:payload
|
||||
slurp
|
||||
json/read-str))
|
||||
|
||||
(defn parse-file
|
||||
[file _]
|
||||
(let [wb (d/load-workbook file)
|
||||
text (->> wb
|
||||
(d/sheet-seq)
|
||||
first
|
||||
(d/cell-seq)
|
||||
(map d/read-cell)
|
||||
(str/join " "))]
|
||||
(let [tmp-key (str "xls-invoice/import/" (java.util.UUID/randomUUID))
|
||||
_ (with-open [f (io/input-stream file)]
|
||||
(s3/put-object {:bucket-name (:data-bucket env)
|
||||
:key tmp-key
|
||||
:input-stream f}))
|
||||
sheet (extract-sheet-details (:data-bucket env) tmp-key)
|
||||
text (str/join " " (mapcat seq sheet))]
|
||||
(->> t/excel-templates
|
||||
(filter (partial template-applies? text))
|
||||
first
|
||||
(extract wb)
|
||||
)))
|
||||
(extract sheet))))
|
||||
|
||||
|
||||
|
||||
|
||||
(defn xls-date->date [f]
|
||||
(when (not-empty f)
|
||||
(let [f (Double/parseDouble f)
|
||||
unix-days (- f 25569.0)
|
||||
unix-secs (* unix-days 86400.0)]
|
||||
(java.util.Date. (long (Math/round (* 1000.0 unix-secs)))))))
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
(ns auto-ap.parse.templates
|
||||
(:require [dk.ative.docjure.spreadsheet :as d]
|
||||
[auto-ap.parse.util :as u]
|
||||
[clojure.string :as str])
|
||||
(:import (org.apache.poi.ss.util CellAddress)))
|
||||
(:require [auto-ap.parse.util :as u]
|
||||
[clojure.string :as str]))
|
||||
|
||||
|
||||
(def pdf-templates
|
||||
@@ -614,97 +612,27 @@
|
||||
:parser {:date [:clj-time "MM/dd/yy"]
|
||||
:total [:trim-commas-and-negate nil]}}])
|
||||
|
||||
(defn offset [c x y]
|
||||
(.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) )))
|
||||
|
||||
(def excel-templates
|
||||
[{:vendor "Isp Productions"
|
||||
:keywords [#"ISP PRODUCTIONS"]
|
||||
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
|
||||
:total [#"PAY THIS" -1 0]
|
||||
:date [#"INVOICE DATE" 0 1]
|
||||
:invoice-number [#"INVOICE NUMBER" 0 1]}}
|
||||
{:vendor "Southern Glazers"
|
||||
:keywords [#"Please note that the total invoice amount may"]
|
||||
:extract {:customer-identifier [#"Customer #" 1 0]
|
||||
:total [#"Subtotal" 0 16 ]
|
||||
:date [#"Date" 0 0 #"Date: (.*)"]
|
||||
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]
|
||||
:account-number [#"Customer #" 0 0 #"Customer #: (.*)"]}
|
||||
:parser { :total [:trim-commas-and-remove-dollars-and-invert-parentheses nil]
|
||||
:date [:clj-time "MM/dd/yyyy"]}}
|
||||
{:vendor "Mama Lu's Foods"
|
||||
[{:vendor "Mama Lu's Foods"
|
||||
:keywords [#"Mama Lu's Foods"]
|
||||
:extract (fn [wb vendor]
|
||||
(let [[sheet] (d/sheet-seq wb)]
|
||||
(transduce (comp
|
||||
(drop 5)
|
||||
(filter
|
||||
:extract (fn [sheet vendor]
|
||||
(transduce (comp
|
||||
(drop 5)
|
||||
(filter
|
||||
(fn [r]
|
||||
(and
|
||||
r
|
||||
(->> r d/cell-seq second d/read-cell))))
|
||||
(map
|
||||
(seq r)
|
||||
(->> r second not-empty))))
|
||||
(map
|
||||
(fn [r]
|
||||
(let [[_ customer-order-number num date name amount] (map d/read-cell (d/cell-seq r))]
|
||||
(let [[_ customer-order-number num date name amount] r]
|
||||
{:customer-identifier (second (re-find #"([^:]*):" name))
|
||||
:text name
|
||||
:full-text name
|
||||
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
|
||||
:invoice-number (str customer-order-number "-" (int num))
|
||||
:total (str amount)
|
||||
:vendor-code vendor}))))
|
||||
conj
|
||||
[]
|
||||
(d/row-seq sheet))))}
|
||||
{:vendor "DVW Commercial"
|
||||
:keywords [#"Total for" #"Num"]
|
||||
:extract (fn [wb vendor]
|
||||
(let [[sheet] (d/sheet-seq wb)]
|
||||
(transduce (comp (filter (fn [c]
|
||||
(re-find #"Invoice" (str (d/read-cell c)))))
|
||||
(map (fn [c]
|
||||
(let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet)
|
||||
(iterate (fn [c]
|
||||
(d/select-cell (offset c 0 -1) sheet)))
|
||||
(filter (fn [c]
|
||||
(not (str/blank? (d/read-cell c)))))
|
||||
first))]
|
||||
{:customer-identifier customer-identifier
|
||||
:text customer-identifier
|
||||
:full-text customer-identifier
|
||||
:date (d/read-cell (d/select-cell (offset c 2 0) sheet))
|
||||
:invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet))
|
||||
:total (str (d/read-cell (d/select-cell (offset c 8 0) sheet)))
|
||||
:vendor-code vendor}))))
|
||||
conj
|
||||
[]
|
||||
(d/cell-seq sheet))))}
|
||||
{:vendor "Chef's Choice Produce Co"
|
||||
:keywords [#"Alt_invoice_number"]
|
||||
:extract (fn [wb vendor]
|
||||
(let [[sheet] (d/sheet-seq wb)]
|
||||
(transduce (comp
|
||||
(drop-while (fn [c]
|
||||
(not (re-find #"Customer_id" (str (d/read-cell c))))))
|
||||
(drop 9)
|
||||
(filter (fn [c]
|
||||
(= 0 (.getColumnIndex c))))
|
||||
(filter (fn [c]
|
||||
(not (str/blank? (str/trim (or (d/read-cell (d/select-cell (offset c 1 0) sheet)) ""))))))
|
||||
(map (fn [c]
|
||||
{:customer-identifier (str/trim (d/read-cell (d/select-cell (offset c 1 0) sheet)))
|
||||
:text (d/read-cell (d/select-cell (offset c 1 0) sheet))
|
||||
:full-text (d/read-cell (d/select-cell (offset c 1 0) sheet))
|
||||
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim (d/read-cell (d/select-cell (offset c 5 0) sheet))))
|
||||
:invoice-number (->>
|
||||
(re-find #"^(?:0+([A-Z0-9]+))|([A-Z]+[A-Z0-9]+)" (str/trim (d/read-cell (d/select-cell (offset c 2 0) sheet))))
|
||||
(drop 1 )
|
||||
(filter identity)
|
||||
first)
|
||||
:total (str (d/read-cell (d/select-cell (offset c 7 0) sheet)))
|
||||
:vendor-code vendor}))
|
||||
(filter :customer-identifier))
|
||||
conj
|
||||
[]
|
||||
(d/cell-seq sheet))))}])
|
||||
:text name
|
||||
:full-text name
|
||||
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
|
||||
:invoice-number (str customer-order-number "-" (Integer/parseInt num))
|
||||
:total (str amount)
|
||||
:vendor-code vendor}))))
|
||||
conj
|
||||
[]
|
||||
sheet))}])
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
[auto-ap.datomic :refer [audit-transact conn]]
|
||||
[auto-ap.logging :as alog]
|
||||
[clojure.data.json :as json]
|
||||
[auto-ap.parse.excel :as excel]
|
||||
[auto-ap.parse :as parse]
|
||||
[amazonica.aws.lambda :as lambda]
|
||||
[config.core :refer [env]]
|
||||
@@ -16,7 +17,6 @@
|
||||
[clojure.java.io :as io]
|
||||
[com.brunobonacci.mulog :as mu]
|
||||
[datomic.api :as dc]
|
||||
[dk.ative.docjure.spreadsheet :as doc]
|
||||
[hiccup2.core :as hiccup]
|
||||
[amazonica.aws.s3 :as s3]))
|
||||
|
||||
@@ -27,14 +27,7 @@
|
||||
(.setScale 2 java.math.RoundingMode/HALF_UP)
|
||||
(double))))
|
||||
|
||||
(defn extract-sheet-details [bucket object]
|
||||
(-> (lambda/invoke {:function-name "xls-extractor" :payload
|
||||
(json/write-str
|
||||
{"s3_url" object "s3_bucket" bucket})}
|
||||
)
|
||||
:payload
|
||||
slurp
|
||||
json/read-str))
|
||||
|
||||
|
||||
|
||||
(defn rows->maps [rows]
|
||||
@@ -43,12 +36,7 @@
|
||||
(into {}
|
||||
(map vector headers r)))))
|
||||
|
||||
(defn xls-date->date [f]
|
||||
(when (not-empty f)
|
||||
(let [f (Double/parseDouble f)
|
||||
unix-days (- f 25569.0)
|
||||
unix-secs (* unix-days 86400.0)]
|
||||
(java.util.Date. (long (Math/round (* 1000.0 unix-secs)))))))
|
||||
|
||||
|
||||
|
||||
(defn map->sales-order [r clients]
|
||||
@@ -67,7 +55,7 @@
|
||||
(parse/exact-match clients))
|
||||
client-id (:db/id client)
|
||||
location (first (:client/locations client))
|
||||
event-date (some-> (xls-date->date event-date)
|
||||
event-date (some-> (excel/xls-date->date event-date)
|
||||
coerce/to-date-time
|
||||
atime/as-local-time
|
||||
coerce/to-date )]
|
||||
@@ -137,7 +125,7 @@
|
||||
:key object
|
||||
:input-stream s})
|
||||
(into []
|
||||
(->> (extract-sheet-details (:data-bucket env) object)
|
||||
(->> (excel/extract-sheet-details (:data-bucket env) object)
|
||||
rows->maps
|
||||
(map #(map->sales-order % clients))
|
||||
(filter identity)))))
|
||||
|
||||
Reference in New Issue
Block a user