added excel.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,3 +10,4 @@ pom.xml.asc
|
||||
/.nrepl-port
|
||||
/resources/public/js/compiled
|
||||
*.log
|
||||
examples/
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
[ring/ring-json "0.4.0"]
|
||||
[ring "1.4.0"]
|
||||
[yogthos/config "0.8"]
|
||||
[dk.ative/docjure "1.12.0"]
|
||||
[org.clojure/java.jdbc "0.7.3"]
|
||||
[cljsjs/dropzone "4.3.0-0"]
|
||||
;; https://mvnrepository.com/artifact/postgresql/postgresql
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
(println existing-invoices)
|
||||
(invoices/insert-multi!
|
||||
(for [{:keys [total date invoice-number customer-identifier vendor] :as row}
|
||||
(parse/parse-file (.getPath tempfile))]
|
||||
(parse/parse-file (.getPath tempfile) filename)]
|
||||
(assoc row
|
||||
:imported false
|
||||
:potential-duplicate (boolean (seq (filter #(and (= vendor (:vendor %))
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
(ns auto-ap.parse
|
||||
(:require [clojure.java.io :as io]
|
||||
[clojure.string :as str]
|
||||
[clojure.java.shell :as sh]))
|
||||
[clojure.java.shell :as sh]
|
||||
[auto-ap.parse.excel :as excel]))
|
||||
|
||||
(def templates
|
||||
[{:vendor "CHFW"
|
||||
@@ -49,8 +50,23 @@
|
||||
first
|
||||
(extract-template text)))
|
||||
|
||||
(defn parse-file
|
||||
[file]
|
||||
|
||||
(defmulti parse-file (fn [file filename] (last (str/split filename #"\." ))))
|
||||
|
||||
(defmethod parse-file
|
||||
"pdf"
|
||||
[file filename]
|
||||
(-> (sh/sh "pdftotext" "-layout" file "-")
|
||||
:out
|
||||
parse))
|
||||
|
||||
(defmethod parse-file
|
||||
"xls"
|
||||
[file filename]
|
||||
(excel/parse-file file filename))
|
||||
|
||||
|
||||
(defmethod parse-file
|
||||
"xlsx"
|
||||
[file filename]
|
||||
(excel/parse-file file filename))
|
||||
|
||||
61
src/clj/auto_ap/parse/excel.clj
Normal file
61
src/clj/auto_ap/parse/excel.clj
Normal file
@@ -0,0 +1,61 @@
|
||||
(ns auto-ap.parse.excel
|
||||
(:import [org.apache.poi.ss.util CellAddress])
|
||||
(:require [dk.ative.docjure.spreadsheet :as d]
|
||||
|
||||
[clojure.string :as str]))
|
||||
|
||||
(def templates
|
||||
[{:vendor "Isp Productions"
|
||||
:keywords [#"ISP PRODUCTIONS"]
|
||||
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
|
||||
:total [#"PAY THIS" -1 0]
|
||||
:date [#"INVOICE DATE" 0 1]
|
||||
:invoice-number [#"INVOICE NUMBER" 0 1]}}
|
||||
{:vendor "Southern Wine Online"
|
||||
:keywords [#"Please note that the total invoice amount may"]
|
||||
:extract {:customer-identifier [#"Customer #" 1 0]
|
||||
:total [#"Total Invoice" 0 5]
|
||||
:date [#"Date" 0 0 #"Date: (.*)"]
|
||||
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}])
|
||||
|
||||
(defn template-applies? [text {:keys [keywords]}]
|
||||
(every? #(re-find % text) keywords))
|
||||
|
||||
(defn extract [wb {:keys [extract vendor]}]
|
||||
(println extract)
|
||||
|
||||
(reduce-kv
|
||||
(fn [invoice k [regex offset-row offset-column extract-regex]]
|
||||
(assoc invoice k
|
||||
(->> wb
|
||||
(d/sheet-seq)
|
||||
first
|
||||
(d/cell-seq)
|
||||
(filter (fn [cell]
|
||||
(re-find regex (str (d/read-cell cell)))))
|
||||
(map (fn [cell]
|
||||
(let [address (.getAddress cell)
|
||||
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
|
||||
(first (d/sheet-seq wb)))))]
|
||||
(if extract-regex
|
||||
(second (re-find extract-regex cell-value))
|
||||
|
||||
cell-value))))
|
||||
first)))
|
||||
{:vendor vendor}
|
||||
extract))
|
||||
|
||||
(defn parse-file
|
||||
[file filename]
|
||||
[(let [wb (d/load-workbook file)
|
||||
text (->> wb
|
||||
(d/sheet-seq)
|
||||
first
|
||||
(d/cell-seq)
|
||||
(map d/read-cell)
|
||||
(str/join " "))]
|
||||
(->> templates
|
||||
(filter (partial template-applies? text))
|
||||
first
|
||||
(extract wb)
|
||||
))])
|
||||
Reference in New Issue
Block a user