added excel.

This commit is contained in:
Bryce Covert
2017-12-12 09:59:24 -08:00
parent 448749b6b4
commit c8bcf2aa02
5 changed files with 83 additions and 4 deletions

View File

@@ -0,0 +1,61 @@
(ns auto-ap.parse.excel
(:import [org.apache.poi.ss.util CellAddress])
(:require [dk.ative.docjure.spreadsheet :as d]
[clojure.string :as str]))
(def templates
[{:vendor "Isp Productions"
:keywords [#"ISP PRODUCTIONS"]
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
:total [#"PAY THIS" -1 0]
:date [#"INVOICE DATE" 0 1]
:invoice-number [#"INVOICE NUMBER" 0 1]}}
{:vendor "Southern Wine Online"
:keywords [#"Please note that the total invoice amount may"]
:extract {:customer-identifier [#"Customer #" 1 0]
:total [#"Total Invoice" 0 5]
:date [#"Date" 0 0 #"Date: (.*)"]
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}])
(defn template-applies? [text {:keys [keywords]}]
(every? #(re-find % text) keywords))
(defn extract [wb {:keys [extract vendor]}]
(println extract)
(reduce-kv
(fn [invoice k [regex offset-row offset-column extract-regex]]
(assoc invoice k
(->> wb
(d/sheet-seq)
first
(d/cell-seq)
(filter (fn [cell]
(re-find regex (str (d/read-cell cell)))))
(map (fn [cell]
(let [address (.getAddress cell)
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
(first (d/sheet-seq wb)))))]
(if extract-regex
(second (re-find extract-regex cell-value))
cell-value))))
first)))
{:vendor vendor}
extract))
(defn parse-file
[file filename]
[(let [wb (d/load-workbook file)
text (->> wb
(d/sheet-seq)
first
(d/cell-seq)
(map d/read-cell)
(str/join " "))]
(->> templates
(filter (partial template-applies? text))
first
(extract wb)
))])