added excel.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,3 +10,4 @@ pom.xml.asc
|
|||||||
/.nrepl-port
|
/.nrepl-port
|
||||||
/resources/public/js/compiled
|
/resources/public/js/compiled
|
||||||
*.log
|
*.log
|
||||||
|
examples/
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
[ring/ring-json "0.4.0"]
|
[ring/ring-json "0.4.0"]
|
||||||
[ring "1.4.0"]
|
[ring "1.4.0"]
|
||||||
[yogthos/config "0.8"]
|
[yogthos/config "0.8"]
|
||||||
|
[dk.ative/docjure "1.12.0"]
|
||||||
[org.clojure/java.jdbc "0.7.3"]
|
[org.clojure/java.jdbc "0.7.3"]
|
||||||
[cljsjs/dropzone "4.3.0-0"]
|
[cljsjs/dropzone "4.3.0-0"]
|
||||||
;; https://mvnrepository.com/artifact/postgresql/postgresql
|
;; https://mvnrepository.com/artifact/postgresql/postgresql
|
||||||
|
|||||||
@@ -54,7 +54,7 @@
|
|||||||
(println existing-invoices)
|
(println existing-invoices)
|
||||||
(invoices/insert-multi!
|
(invoices/insert-multi!
|
||||||
(for [{:keys [total date invoice-number customer-identifier vendor] :as row}
|
(for [{:keys [total date invoice-number customer-identifier vendor] :as row}
|
||||||
(parse/parse-file (.getPath tempfile))]
|
(parse/parse-file (.getPath tempfile) filename)]
|
||||||
(assoc row
|
(assoc row
|
||||||
:imported false
|
:imported false
|
||||||
:potential-duplicate (boolean (seq (filter #(and (= vendor (:vendor %))
|
:potential-duplicate (boolean (seq (filter #(and (= vendor (:vendor %))
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
(ns auto-ap.parse
|
(ns auto-ap.parse
|
||||||
(:require [clojure.java.io :as io]
|
(:require [clojure.java.io :as io]
|
||||||
[clojure.string :as str]
|
[clojure.string :as str]
|
||||||
[clojure.java.shell :as sh]))
|
[clojure.java.shell :as sh]
|
||||||
|
[auto-ap.parse.excel :as excel]))
|
||||||
|
|
||||||
(def templates
|
(def templates
|
||||||
[{:vendor "CHFW"
|
[{:vendor "CHFW"
|
||||||
@@ -49,8 +50,23 @@
|
|||||||
first
|
first
|
||||||
(extract-template text)))
|
(extract-template text)))
|
||||||
|
|
||||||
(defn parse-file
|
|
||||||
[file]
|
(defmulti parse-file (fn [file filename] (last (str/split filename #"\." ))))
|
||||||
|
|
||||||
|
(defmethod parse-file
|
||||||
|
"pdf"
|
||||||
|
[file filename]
|
||||||
(-> (sh/sh "pdftotext" "-layout" file "-")
|
(-> (sh/sh "pdftotext" "-layout" file "-")
|
||||||
:out
|
:out
|
||||||
parse))
|
parse))
|
||||||
|
|
||||||
|
(defmethod parse-file
|
||||||
|
"xls"
|
||||||
|
[file filename]
|
||||||
|
(excel/parse-file file filename))
|
||||||
|
|
||||||
|
|
||||||
|
(defmethod parse-file
|
||||||
|
"xlsx"
|
||||||
|
[file filename]
|
||||||
|
(excel/parse-file file filename))
|
||||||
|
|||||||
61
src/clj/auto_ap/parse/excel.clj
Normal file
61
src/clj/auto_ap/parse/excel.clj
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
(ns auto-ap.parse.excel
|
||||||
|
(:import [org.apache.poi.ss.util CellAddress])
|
||||||
|
(:require [dk.ative.docjure.spreadsheet :as d]
|
||||||
|
|
||||||
|
[clojure.string :as str]))
|
||||||
|
|
||||||
|
(def templates
|
||||||
|
[{:vendor "Isp Productions"
|
||||||
|
:keywords [#"ISP PRODUCTIONS"]
|
||||||
|
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
|
||||||
|
:total [#"PAY THIS" -1 0]
|
||||||
|
:date [#"INVOICE DATE" 0 1]
|
||||||
|
:invoice-number [#"INVOICE NUMBER" 0 1]}}
|
||||||
|
{:vendor "Southern Wine Online"
|
||||||
|
:keywords [#"Please note that the total invoice amount may"]
|
||||||
|
:extract {:customer-identifier [#"Customer #" 1 0]
|
||||||
|
:total [#"Total Invoice" 0 5]
|
||||||
|
:date [#"Date" 0 0 #"Date: (.*)"]
|
||||||
|
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}])
|
||||||
|
|
||||||
|
(defn template-applies? [text {:keys [keywords]}]
|
||||||
|
(every? #(re-find % text) keywords))
|
||||||
|
|
||||||
|
(defn extract [wb {:keys [extract vendor]}]
|
||||||
|
(println extract)
|
||||||
|
|
||||||
|
(reduce-kv
|
||||||
|
(fn [invoice k [regex offset-row offset-column extract-regex]]
|
||||||
|
(assoc invoice k
|
||||||
|
(->> wb
|
||||||
|
(d/sheet-seq)
|
||||||
|
first
|
||||||
|
(d/cell-seq)
|
||||||
|
(filter (fn [cell]
|
||||||
|
(re-find regex (str (d/read-cell cell)))))
|
||||||
|
(map (fn [cell]
|
||||||
|
(let [address (.getAddress cell)
|
||||||
|
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
|
||||||
|
(first (d/sheet-seq wb)))))]
|
||||||
|
(if extract-regex
|
||||||
|
(second (re-find extract-regex cell-value))
|
||||||
|
|
||||||
|
cell-value))))
|
||||||
|
first)))
|
||||||
|
{:vendor vendor}
|
||||||
|
extract))
|
||||||
|
|
||||||
|
(defn parse-file
|
||||||
|
[file filename]
|
||||||
|
[(let [wb (d/load-workbook file)
|
||||||
|
text (->> wb
|
||||||
|
(d/sheet-seq)
|
||||||
|
first
|
||||||
|
(d/cell-seq)
|
||||||
|
(map d/read-cell)
|
||||||
|
(str/join " "))]
|
||||||
|
(->> templates
|
||||||
|
(filter (partial template-applies? text))
|
||||||
|
first
|
||||||
|
(extract wb)
|
||||||
|
))])
|
||||||
Reference in New Issue
Block a user