| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237 |
- (in-package :cl-user)
- (defpackage address-formatting
- (:use :cl :cl-ppcre)
- (:export :format-address))
- (in-package :address-formatting)
- (defun sexp (file)
- (with-open-file (s (merge-pathnames file (asdf:component-pathname
- (asdf:find-system '#:address-formatting))))
- (read s)))
- (eval-when (:load-toplevel :execute)
- (defparameter +worldwide+ (sexp "worldwide.sexp"))
- (defparameter +components+ (sexp "components.sexp"))
- (defparameter +country-langs+ (sexp "country2lang.sexp"))
- (defparameter +abbrevs+ (sexp "abbrevs.sexp"))
- (defparameter +county-codes+ (sexp "county_codes.sexp"))
- (defparameter +state-codes+ (sexp "state_codes.sexp"))
- (defparameter +known-components+ (loop for comp in +components+
- collect (aget "name" comp)
- append (aget "aliases" comp))))
- (defmacro aget (key alist)
- `(cdr (assoc ,key ,alist :test #'equal)))
- (defun agets (alist &rest keys)
- (reduce #'(lambda (a k) (aget k a)) keys :initial-value alist))
- (defun aset (alist key value)
- (if (aget key alist)
- (setf (aget key alist) value)
- (setf (cdr (last alist)) (list (cons key value)))))
- (defun iscan (regex target)
- (scan (create-scanner regex :case-insensitive-mode t) target))
- (defun unify-address (address)
- (loop for comp in +components+
- for name = (agets comp "name")
- for aliases = (agets comp "aliases")
- for value = (loop for key in (cons name aliases)
- for val = (aget key address)
- when val do (return val))
- when value collect (cons name value)))
- (defun has-minimum-components (addr)
- (< (loop for comp in '("road" "postcode")
- unless (aget comp addr) count 1)
- 2))
- (defun get-template (addr country-code)
- (let* ((template (or (agets +worldwide+ country-code)
- (agets +worldwide+ "default")))
- (text (or (agets template "address_template") "")))
- (values template
- (if (has-minimum-components addr) text
- (or (agets template "fallback_template")
- (agets +worldwide+ "default" "fallback_template")
- text)))))
- (defparameter +replacements+
- (loop for (re repl) on '("[\\},\\s]+$" ""
- "^[,\\s]+" ""
- "^- " "" ;; line starting with dash due to a parameter missing
- ",\\s*," ", " ;; multiple commas to one
- "\\h+,\\h+" ", " ;; one horiz whitespace behind comma
- "\\h\\h+" " " ;; multiple horiz whitespace to one
- "\\h\\n" "
- " ;; horiz whitespace, newline to newline
- "\\n," "
- " ;; newline comma to just newline
- ",,+" "," ;; multiple commas to one
- ",\\n" "
- " ;; comma newline to just newline
- "\\n\\h+" "
- " ;; newline plus space to newline
- "\\n\\n+" "
- " ;; multiple newline to one
- ) by #'cddr
- collect (cons (create-scanner re :multi-line-mode t) repl)))
- (defparameter +re-newline+ (create-scanner "\\n" :multi-line-mode t))
- (defparameter +re-comma+ (create-scanner ", "))
- (defparameter +re-line-horiz+ (loop for re in '("^\\h+" "\\h+$")
- collect (create-scanner re :multi-line-mode t)))
- (defparameter +re-line-vert+ (loop for re in '("^\\s+" "\\s+$")
- collect (create-scanner re :multi-line-mode t)))
- (defun trim (txt rexes)
- (reduce (lambda (txt re) (regex-replace re txt ""))
- rexes :initial-value txt))
- (defun dedup (txt re fmt)
- (format nil fmt
- (loop with seen = '()
- for piece in (split re txt)
- for clean-piece = (trim piece +re-line-vert+)
- unless (member clean-piece seen :test #'equal)
- collect clean-piece do (push clean-piece seen))))
- (defun cleanup-rendered (text &optional one-line)
- (let* ((processed (reduce (lambda (txt p) (regex-replace (car p) txt (cdr p)))
- +replacements+ :initial-value text))
- (lines (loop with seen-lines = '()
- for line in (split +re-newline+ processed)
- for clean-line = (trim line +re-line-horiz+)
- unless (member clean-line seen-lines :test #'equal)
- collect (dedup clean-line +re-comma+ "~{~a~^, ~}")
- do (push clean-line seen-lines))))
- (if one-line (trim (format nil "~{~a~^, ~}" lines) +re-line-vert+)
- (format nil "~a~%"
- (trim (format nil "~{~a~^~%~}" lines) +re-line-vert+)))))
- (defparameter +re-first-split+ (create-scanner "\\s*\\|\\|\\s*"))
- (defparameter +re-any-symbol+ (create-scanner "\\w" :multi-line-mode t))
- (defun render (template address &optional one-line)
- (labels ((fn-first (text)
- (or (car (remove "" (split +re-first-split+ (mustache:render* text address))
- :test #'equal)) "")))
- (let* ((context (cons (cons "first" #'fn-first) address))
- (result (cleanup-rendered (mustache:render* template context) one-line)))
- (if (scan +re-any-symbol+ result) result
- (cleanup-rendered (format nil "~{~a~^, ~}"
- (remove "" (mapcar #'cdr address) :test #'equal))
- one-line)))))
- (defun fix-country (addr)
-
- addr)
- (defun apply-replace (addr replace) addr)
- (defun add-code (addr type) addr)
- (defun set-attention (addr)
- (let ((unknown (loop for (k . v) in addr
- unless (member k +known-components+ :test #'equal)
- collect v)))
- (when unknown (aset addr "attention" (format nil "~{~a~^, ~}" unknown)))
- addr))
- (defun abbreviate (addr)
- (loop for lang in (split "," (agets +country-langs+
- (string-upcase (aget "country_code" addr))))
- do (loop for (k . abbrevs) in (agets +abbrevs+ lang)
- do (when (agets addr k)
- (loop with txt = (aget k addr)
- for (long . short) in abbrevs
- do (setf txt (regex-replace
- (create-scanner (format nil "\\b~a\\b" long)
- :multi-line-mode t)
- txt short))
- finally (aset addr k txt)))))
- addr)
- (defun post-replace (text replace)
- (loop with txt = (dedup text +re-comma+ "~{~a~^, ~}")
- for (from to) in replace
- do (setf txt (regex-replace (create-scanner from :multi-line-mode t)
- txt to))
- finally (return txt)))
- (defun format-address (address &key country-code abbreviate set-attention one-line)
- (let* ((uaddr (sanity-clean-address (unify-address address)))
- (country-code (or country-code (determine-country-code uaddr))))
- (when country-code
- (aset uaddr "country_code" country-code))
- (multiple-value-bind (tpl tpl-text)
- (get-template uaddr country-code)
- (let* ((pre-processors (remove nil (list #'fix-country
- (when (agets tpl "replace")
- (lambda (a)
- (apply-replace a (agets tpl "replace"))))
- (lambda (a) (add-code a "state"))
- (lambda (a) (add-code a "county"))
- (when set-attention #'set-attention)
- (when abbreviate #'abbreviate))))
- (post-processors (when (agets tpl "postformat_replace")
- (list (lambda (txt)
- (post-replace txt (agets tpl "postformat_replace")))
- (lambda (txt) (cleanup-rendered txt one-line)))))
- (address (reduce (lambda (a f) (funcall f a)) pre-processors :initial-value uaddr))
- (text (render tpl-text address one-line))
- (result (reduce (lambda (a f) (funcall f a)) post-processors :initial-value text)))
- result))))
- (defparameter +re-is-url+ (create-scanner "https?://"))
- (defparameter +re-postcode-range+ (create-scanner "(\\d+)[,;]\\d+"))
- (defun sanity-clean-address (addr)
- (labels ((sanitize (k v)
- (unless (scan +re-is-url+ v)
- (if (equal k "postcode")
- (unless (> (length v) 20)
- (multiple-value-bind (m start)
- (scan-to-strings +re-postcode-range+ v)
- (if m (elt start 0) v)))
- v))))
- (loop for (k . v) in addr
- for val = (sanitize k v)
- when val collect (cons k val))))
- (defun determine-country-code (addr)
- (labels ((comp (key) (aget key addr)))
- (when (comp "country_code")
- (let* ((country-code (string-upcase (aget "country_code" addr)))
- (use-country (agets +worldwide+ country-code "use_country"))
- (change-country (agets +worldwide+ country-code "change_country"))
- (add-component (agets +worldwide+ country-code "add_component")))
- (when (= 2 (length country-code))
- (when (equal country-code "UK")
- (setf country-code "GB"))
- (when use-country
- (when change-country
- (register-groups-bind (component)
- ("\\$(\\w+)" change-country)
- (setf change-country (regex-replace (format nil "\\$~a" component)
- change-country
- (or (comp component) ""))))
- (aset addr "country" change-country))
- (when (and add-component (position #\= add-component))
- (destructuring-bind (k v) (split "=" add-component)
- (aset addr k v)))
- (setf country-code use-country))
-
- (when (equal "NL" country-code)
- (setf country-code
- (cond
- ((equal "Curaçao" (comp "state"))
- (aset addr "country" "Curaçao")
- "CW")
- ((iscan "^sint maarten" (comp "state"))
- (aset addr "country" "Sint Maarten")
- "SX")
- ((iscan "^Aruba" (comp "state"))
- (aset addr "country" "Aruba")
- "AW")
- (t country-code)))))
- country-code))))
|