(ns auto-ap.parse.excel (:require [auto-ap.parse.templates :as t] [auto-ap.parse.util :as u] [clojure.string :as str] [amazonica.aws.lambda :as lambda] [clojure.data.json :as json] [config.core :refer [env]] [clojure.java.io :as io] [amazonica.aws.s3 :as s3]) ) (defn template-applies? [text {:keys [keywords]}] (every? #(re-find % text) keywords)) (defn extract [wb {:keys [extract vendor parser]}] (if (fn? extract) (extract wb vendor) #_[(reduce-kv (fn [invoice k [regex offset-row offset-column extract-regex]] (assoc invoice k (->> wb (d/sheet-seq) first (d/cell-seq) (filter (fn [cell] (re-find regex (str (d/read-cell cell))))) (map (fn [cell] (let [address (.getAddress cell) cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) )) (first (d/sheet-seq wb))))) raw-result (if extract-regex (second (re-find extract-regex cell-value)) cell-value)] (if (get parser k) (u/parse-value (first (get parser k) ) (second (get parser k) ) raw-result) raw-result )))) first))) {:vendor-code vendor} extract)])) (defn extract-sheet-details [bucket object] (doto (-> (lambda/invoke {:function-name "xls-extractor" :payload (json/write-str {"s3_url" object "s3_bucket" bucket})}) :payload slurp json/read-str) println)) (defn parse-file [file _] (let [tmp-key (str "xls-invoice/import/" (java.util.UUID/randomUUID)) _ (with-open [f (io/input-stream file)] (s3/put-object {:bucket-name (:data-bucket env) :key tmp-key :input-stream f})) sheet (extract-sheet-details (:data-bucket env) tmp-key) text (str/join " " (mapcat seq sheet))] (->> t/excel-templates (filter (partial template-applies? text)) first (extract sheet)))) (defn xls-date->date [f] (when (not-empty f) (let [f (Double/parseDouble f) unix-days (- f 25569.0) unix-secs (* unix-days 86400.0)] (java.util.Date. (long (Math/round (* 1000.0 unix-secs)))))))