Adds Cheetah

This commit is contained in:
2022-01-11 11:11:36 -08:00
parent 318ae3024c
commit 55450a3c32
3 changed files with 82 additions and 80 deletions

View File

@@ -1,30 +1,29 @@
(ns auto-ap.handler (ns auto-ap.handler
(:require [amazonica.core :refer [defcredential]] (:require
[amazonica.core :refer [defcredential]]
[auto-ap.client-routes :as client-routes]
[auto-ap.routes.auth :as auth] [auto-ap.routes.auth :as auth]
[auto-ap.routes.events :as events]
[auto-ap.routes.exports :as exports] [auto-ap.routes.exports :as exports]
[auto-ap.routes.queries :as queries]
[auto-ap.routes.graphql :as graphql] [auto-ap.routes.graphql :as graphql]
[auto-ap.routes.invoices :as invoices] [auto-ap.routes.invoices :as invoices]
[auto-ap.routes.queries :as queries]
[auto-ap.routes.yodlee :as yodlee] [auto-ap.routes.yodlee :as yodlee]
[auto-ap.routes.yodlee2 :as yodlee2] [bidi.bidi :as bidi]
[buddy.auth.backends.token :refer [jws-backend]] [buddy.auth.backends.token :refer [jws-backend]]
[buddy.auth.middleware :refer [wrap-authentication wrap-authorization]] [buddy.auth.middleware :refer [wrap-authentication wrap-authorization]]
[clojure.tools.logging :as log]
[clojure.string :as str] [clojure.string :as str]
[clojure.tools.logging :as log]
#_{:clj-kondo/ignore [:refer-all]}
[compojure.core :refer :all] [compojure.core :refer :all]
[compojure.route :as route] [compojure.route :as route]
[config.core :refer [env]] [config.core :refer [env]]
[mount.core :as mount] [mount.core :as mount]
[ring.middleware.edn :refer [wrap-edn-params]] [ring.middleware.edn :refer [wrap-edn-params]]
[ring.middleware.gzip :refer [wrap-gzip]]
[ring.middleware.multipart-params :as mp] [ring.middleware.multipart-params :as mp]
[ring.middleware.params :refer [wrap-params]] [ring.middleware.params :refer [wrap-params]]
[ring.middleware.reload :refer [wrap-reload]] [ring.middleware.reload :refer [wrap-reload]]
[ring.util.response :as response] [ring.util.response :as response]
[unilog.context :as lc] [unilog.context :as lc]))
[auto-ap.client-routes :as client-routes]
[bidi.bidi :as bidi]))
(when (:aws-access-key-id env) (when (:aws-access-key-id env)
(defcredential (:aws-access-key-id env) (:aws-secret-access-key env) (:aws-region env))) (defcredential (:aws-access-key-id env) (:aws-secret-access-key env) (:aws-region env)))
@@ -38,8 +37,8 @@
(defroutes static-routes (defroutes static-routes
(GET "/" [] (response/resource-response "index.html" {:root "public"})) (GET "/" [] (response/resource-response "index.html" {:root "public"}))
(route/resources "/") (route/resources "/")
(routes (ANY "*" {:keys [path] :as r} (routes (ANY "*" {:keys [uri]}
(if (bidi/match-route client-routes/routes (:uri r)) (if (bidi/match-route client-routes/routes uri)
(response/resource-response "index.html" {:root "public"}) (response/resource-response "index.html" {:root "public"})
{:status 404 {:status 404
:body "Not found"})))) :body "Not found"}))))
@@ -56,14 +55,12 @@
(context "/api" [] (context "/api" []
exports/export-routes exports/export-routes
yodlee/routes yodlee/routes
yodlee2/routes
queries/query2-routes queries/query2-routes
invoices/routes invoices/routes
graphql/routes graphql/routes
auth/routes auth/routes
health-check)) health-check))
(def auth-backend (jws-backend {:secret (:jwt-secret env) :options {:alg :hs512}})) (def auth-backend (jws-backend {:secret (:jwt-secret env) :options {:alg :hs512}}))
(defn wrap-transaction [handler] (defn wrap-transaction [handler]
@@ -86,8 +83,6 @@
(log/info "Beginning request" (:uri request))) (log/info "Beginning request" (:uri request)))
(handler request)))) (handler request))))
(def app (def app
(-> #'app-routes (-> #'app-routes
(wrap-logging) (wrap-logging)
@@ -96,5 +91,4 @@
(wrap-reload) (wrap-reload)
(wrap-params) (wrap-params)
(mp/wrap-multipart-params) (mp/wrap-multipart-params)
(wrap-edn-params) (wrap-edn-params)))
#_(wrap-gzip)))

View File

@@ -1,16 +1,15 @@
(ns auto-ap.parse (ns auto-ap.parse
(:require [auto-ap.parse.csv :as csv] (:require
[auto-ap.logging :refer [info-event]]
[auto-ap.parse.csv :as csv]
[auto-ap.parse.excel :as excel] [auto-ap.parse.excel :as excel]
[auto-ap.parse.templates :as t] [auto-ap.parse.templates :as t]
[auto-ap.parse.util :as u] [auto-ap.parse.util :as u]
[clj-fuzzy.metrics :as m] [clj-fuzzy.metrics :as m]
[clj-time.core :as time]
[clj-time.format :as f]
[clojure.java.shell :as sh] [clojure.java.shell :as sh]
[clojure.set :as set] [clojure.set :as set]
[clojure.string :as str] [clojure.string :as str]
[clojure.tools.logging :as log] [clojure.tools.logging :as log]))
[auto-ap.logging :refer [info-event]]))
(def last-text (atom nil)) (def last-text (atom nil))
@@ -57,11 +56,11 @@
(extract-template text))) (extract-template text)))
(defmulti parse-file (fn [file filename] (.toLowerCase (last (str/split filename #"\." ))))) (defmulti parse-file (fn [_ filename] (.toLowerCase (last (str/split filename #"\." )))))
(defmethod parse-file (defmethod parse-file
"pdf" "pdf"
[file filename] [file _]
(-> (sh/sh "pdftotext" "-layout" file "-") (-> (sh/sh "pdftotext" "-layout" file "-")
:out :out
parse)) parse))
@@ -87,7 +86,7 @@
(best-match clients invoice-client-name 0.25)) (best-match clients invoice-client-name 0.25))
([clients invoice-client-name threshold] ([clients invoice-client-name threshold]
(let [fuzzy-match (->> clients (let [fuzzy-match (->> clients
(mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] (mapcat (fn [{:keys [:client/matches :client/name] :as client :or {matches []}}]
(map (fn [m] (map (fn [m]
[client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))]) [client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))])
(conj matches name)))) (conj matches name))))
@@ -98,7 +97,7 @@
word-set (set (filter (complement str/blank?) (str/split (.toLowerCase invoice-client-name) #"[\s:\-]" ))) word-set (set (filter (complement str/blank?) (str/split (.toLowerCase invoice-client-name) #"[\s:\-]" )))
client-word-match (->> clients client-word-match (->> clients
(map (map
(fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] (fn [{:keys [:client/matches :client/name] :as client :or {matches []}}]
(let [client-words (-> #{} (let [client-words (-> #{}
(into (into
(mapcat (mapcat
@@ -119,7 +118,7 @@
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}] (mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
(map (fn [match] [location match]) matches))) (map (fn [match] [location match]) matches)))
(filter (fn [[location match]] (filter (fn [[_ match]]
(re-find (re-pattern (str "(?i)" match)) text)) ) (re-find (re-pattern (str "(?i)" match)) text)) )
first first
first) first)
@@ -127,15 +126,14 @@
:client/location-matches :client/location-matches
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}] (mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
(map (fn [match] [location match]) matches))) (map (fn [match] [location match]) matches)))
(filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) full-text)) ) (filter (fn [[_ match]] (re-find (re-pattern (str "(?i)" match)) full-text)) )
first first
first) first)
(:client/default-location client) (:client/default-location client)
(first (:client/locations client)))) (first (:client/locations client))))
#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
(defn dbg-parse [v] (defn dbg-parse [v]
(doto
(map (map
(fn [x] (dissoc x :full-text :text)) (fn [x] (dissoc x :full-text :text))
(parse v)) (parse v)))
clojure.pprint/pprint ))

View File

@@ -182,6 +182,16 @@
:parser {:date [:clj-time "MM/dd/yyyy"] :parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}} :total [:trim-commas nil]}}
;; Cheetah
{:vendor "Cheetah"
:keywords [#"Truck name" #"Stop number"]
:extract {:date #"Delivery date: ([0-9\-]+)"
:customer-identifier #"Shipping.*\n(.*)"
:invoice-number #"Invoice #: (\d+)"
:total #"TOTAL:.*?\$(.*)"}
:parser {:date [:clj-time "yyyy-MM-dd"]
:total [:trim-commas nil]}}
;; Classic Wines ;; Classic Wines
{:vendor "Classic Wines" {:vendor "Classic Wines"
:keywords [#"585-9463"] :keywords [#"585-9463"]