(in-package :cl-user) (defpackage address-formatting (:use :cl :cl-ppcre) (:export :format-address)) (in-package :address-formatting) (defun sexp (file) (with-open-file (s (merge-pathnames file (asdf:component-pathname (asdf:find-system '#:address-formatting)))) (read s))) (eval-when (:load-toplevel :execute) (defparameter +worldwide+ (sexp "worldwide.sexp")) (defparameter +components+ (sexp "components.sexp")) (defparameter +country-langs+ (sexp "country2lang.sexp")) (defparameter +abbrevs+ (sexp "abbrevs.sexp")) (defparameter +county-codes+ (sexp "county_codes.sexp")) (defparameter +state-codes+ (sexp "state_codes.sexp")) (defparameter +known-components+ (loop for comp in +components+ collect (aget "name" comp) append (aget "aliases" comp)))) (defmacro aget (key alist) `(cdr (assoc ,key ,alist :test #'equal))) (defun agets (alist &rest keys) (reduce #'(lambda (a k) (aget k a)) keys :initial-value alist)) (defun aset (alist key value) (if (aget key alist) (setf (aget key alist) value) (setf (cdr (last alist)) (list (cons key value))))) (defun iscan (regex target) (scan (create-scanner regex :case-insensitive-mode t) target)) (defun unify-address (address) (loop for comp in +components+ for name = (agets comp "name") for aliases = (agets comp "aliases") for value = (loop for key in (cons name aliases) for val = (aget key address) when val do (return val)) when value collect (cons name value))) (defun has-minimum-components (addr) (< (loop for comp in '("road" "postcode") unless (aget comp addr) count 1) 2)) (defun get-template (addr country-code) (let* ((template (or (agets +worldwide+ country-code) (agets +worldwide+ "default"))) (text (or (agets template "address_template") ""))) (values template (if (has-minimum-components addr) text (or (agets template "fallback_template") (agets +worldwide+ "default" "fallback_template") text))))) (defparameter +replacements+ (loop for (re repl) on '("[\\},\\s]+$" "" "^[,\\s]+" "" "^- " "" ;; line starting with dash due to a parameter missing ",\\s*," ", " ;; multiple commas to one "\\h+,\\h+" ", " ;; one horiz whitespace behind comma "\\h\\h+" " " ;; multiple horiz whitespace to one "\\h\\n" " " ;; horiz whitespace, newline to newline "\\n," " " ;; newline comma to just newline ",,+" "," ;; multiple commas to one ",\\n" " " ;; comma newline to just newline "\\n\\h+" " " ;; newline plus space to newline "\\n\\n+" " " ;; multiple newline to one ) by #'cddr collect (cons (create-scanner re :multi-line-mode t) repl))) (defparameter +re-newline+ (create-scanner "\\n" :multi-line-mode t)) (defparameter +re-comma+ (create-scanner ", ")) (defparameter +re-line-horiz+ (loop for re in '("^\\h+" "\\h+$") collect (create-scanner re :multi-line-mode t))) (defparameter +re-line-vert+ (loop for re in '("^\\s+" "\\s+$") collect (create-scanner re :multi-line-mode t))) (defun trim (txt rexes) (reduce (lambda (txt re) (regex-replace re txt "")) rexes :initial-value txt)) (defun dedup (txt re fmt) (format nil fmt (loop with seen = '() for piece in (split re txt) for clean-piece = (trim piece +re-line-vert+) unless (member clean-piece seen :test #'equal) collect clean-piece do (push clean-piece seen)))) (defun cleanup-rendered (text &optional one-line) (let* ((processed (reduce (lambda (txt p) (regex-replace (car p) txt (cdr p))) +replacements+ :initial-value text)) (lines (loop with seen-lines = '() for line in (split +re-newline+ processed) for clean-line = (trim line +re-line-horiz+) unless (member clean-line seen-lines :test #'equal) collect (dedup clean-line +re-comma+ "~{~a~^, ~}") do (push clean-line seen-lines)))) (if one-line (trim (format nil "~{~a~^, ~}" lines) +re-line-vert+) (format nil "~a~%" (trim (format nil "~{~a~^~%~}" lines) +re-line-vert+))))) (defparameter +re-first-split+ (create-scanner "\\s*\\|\\|\\s*")) (defparameter +re-any-symbol+ (create-scanner "\\w" :multi-line-mode t)) (defun render (template address &optional one-line) (labels ((fn-first (text) (or (car (remove "" (split +re-first-split+ (mustache:render* text address)) :test #'equal)) ""))) (let* ((context (cons (cons "first" #'fn-first) address)) (result (cleanup-rendered (mustache:render* template context) one-line))) (if (scan +re-any-symbol+ result) result (cleanup-rendered (format nil "~{~a~^, ~}" (remove "" (mapcar #'cdr address) :test #'equal)) one-line))))) (defun fix-country (addr) addr) (defun apply-replace (addr replace) addr) (defun add-code (addr type) addr) (defun set-attention (addr) (let ((unknown (loop for (k . v) in addr unless (member k +known-components+ :test #'equal) collect v))) (when unknown (aset addr "attention" (format nil "~{~a~^, ~}" unknown))) addr)) (defun abbreviate (addr) (loop for lang in (split "," (agets +country-langs+ (string-upcase (aget "country_code" addr)))) do (loop for (k . abbrevs) in (agets +abbrevs+ lang) do (when (agets addr k) (loop with txt = (aget k addr) for (long . short) in abbrevs do (setf txt (regex-replace (create-scanner (format nil "\\b~a\\b" long) :multi-line-mode t) txt short)) finally (aset addr k txt))))) addr) (defun post-replace (text replace) (loop with txt = (dedup text +re-comma+ "~{~a~^, ~}") for (from to) in replace do (setf txt (regex-replace (create-scanner from :multi-line-mode t) txt to)) finally (return txt))) (defun format-address (address &key country-code abbreviate set-attention one-line) (let* ((uaddr (sanity-clean-address (unify-address address))) (country-code (or country-code (determine-country-code uaddr)))) (when country-code (aset uaddr "country_code" country-code)) (multiple-value-bind (tpl tpl-text) (get-template uaddr country-code) (let* ((pre-processors (remove nil (list #'fix-country (when (agets tpl "replace") (lambda (a) (apply-replace a (agets tpl "replace")))) (lambda (a) (add-code a "state")) (lambda (a) (add-code a "county")) (when set-attention #'set-attention) (when abbreviate #'abbreviate)))) (post-processors (when (agets tpl "postformat_replace") (list (lambda (txt) (post-replace txt (agets tpl "postformat_replace"))) (lambda (txt) (cleanup-rendered txt one-line))))) (address (reduce (lambda (a f) (funcall f a)) pre-processors :initial-value uaddr)) (text (render tpl-text address one-line)) (result (reduce (lambda (a f) (funcall f a)) post-processors :initial-value text))) result)))) (defparameter +re-is-url+ (create-scanner "https?://")) (defparameter +re-postcode-range+ (create-scanner "(\\d+)[,;]\\d+")) (defun sanity-clean-address (addr) (labels ((sanitize (k v) (unless (scan +re-is-url+ v) (if (equal k "postcode") (unless (> (length v) 20) (multiple-value-bind (m start) (scan-to-strings +re-postcode-range+ v) (if m (elt start 0) v))) v)))) (loop for (k . v) in addr for val = (sanitize k v) when val collect (cons k val)))) (defun determine-country-code (addr) (labels ((comp (key) (aget key addr))) (when (comp "country_code") (let* ((country-code (string-upcase (aget "country_code" addr))) (use-country (agets +worldwide+ country-code "use_country")) (change-country (agets +worldwide+ country-code "change_country")) (add-component (agets +worldwide+ country-code "add_component"))) (when (= 2 (length country-code)) (when (equal country-code "UK") (setf country-code "GB")) (when use-country (when change-country (register-groups-bind (component) ("\\$(\\w+)" change-country) (setf change-country (regex-replace (format nil "\\$~a" component) change-country (or (comp component) "")))) (aset addr "country" change-country)) (when (and add-component (position #\= add-component)) (destructuring-bind (k v) (split "=" add-component) (aset addr k v))) (setf country-code use-country)) (when (equal "NL" country-code) (setf country-code (cond ((equal "Curaçao" (comp "state")) (aset addr "country" "Curaçao") "CW") ((iscan "^sint maarten" (comp "state")) (aset addr "country" "Sint Maarten") "SX") ((iscan "^Aruba" (comp "state")) (aset addr "country" "Aruba") "AW") (t country-code))))) country-code))))