format.lisp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. (in-package :cl-user)
  2. (defpackage address-formatting
  3. (:use :cl :cl-ppcre)
  4. (:export :format-address))
  5. (in-package :address-formatting)
  6. (defun sexp (file)
  7. (with-open-file (s (merge-pathnames file (asdf:component-pathname
  8. (asdf:find-system '#:address-formatting))))
  9. (read s)))
  10. (eval-when (:load-toplevel :execute)
  11. (defparameter +worldwide+ (sexp "worldwide.sexp"))
  12. (defparameter +components+ (sexp "components.sexp"))
  13. (defparameter +country-langs+ (sexp "country2lang.sexp"))
  14. (defparameter +abbrevs+ (sexp "abbrevs.sexp"))
  15. (defparameter +county-codes+ (sexp "county_codes.sexp"))
  16. (defparameter +state-codes+ (sexp "state_codes.sexp"))
  17. (defparameter +known-components+ (loop for comp in +components+
  18. collect (aget "name" comp)
  19. append (aget "aliases" comp))))
  20. (defmacro aget (key alist)
  21. `(cdr (assoc ,key ,alist :test #'equal)))
  22. (defun agets (alist &rest keys)
  23. (reduce #'(lambda (a k) (aget k a)) keys :initial-value alist))
  24. (defun aset (alist key value)
  25. (if (aget key alist)
  26. (setf (aget key alist) value)
  27. (setf (cdr (last alist)) (list (cons key value)))))
  28. (defun iscan (regex target)
  29. (scan (create-scanner regex :case-insensitive-mode t) target))
  30. (defun unify-address (address)
  31. (loop for comp in +components+
  32. for name = (agets comp "name")
  33. for aliases = (agets comp "aliases")
  34. for value = (loop for key in (cons name aliases)
  35. for val = (aget key address)
  36. when val do (return val))
  37. when value collect (cons name value)))
  38. (defun has-minimum-components (addr)
  39. (< (loop for comp in '("road" "postcode")
  40. unless (aget comp addr) count 1)
  41. 2))
  42. (defun get-template (addr country-code)
  43. (let* ((template (or (agets +worldwide+ country-code)
  44. (agets +worldwide+ "default")))
  45. (text (or (agets template "address_template") "")))
  46. (values template
  47. (if (has-minimum-components addr) text
  48. (or (agets template "fallback_template")
  49. (agets +worldwide+ "default" "fallback_template")
  50. text)))))
  51. (defparameter +replacements+
  52. (loop for (re repl) on '("[\\},\\s]+$" ""
  53. "^[,\\s]+" ""
  54. "^- " "" ;; line starting with dash due to a parameter missing
  55. ",\\s*," ", " ;; multiple commas to one
  56. "\\h+,\\h+" ", " ;; one horiz whitespace behind comma
  57. "\\h\\h+" " " ;; multiple horiz whitespace to one
  58. "\\h\\n" "
  59. " ;; horiz whitespace, newline to newline
  60. "\\n," "
  61. " ;; newline comma to just newline
  62. ",,+" "," ;; multiple commas to one
  63. ",\\n" "
  64. " ;; comma newline to just newline
  65. "\\n\\h+" "
  66. " ;; newline plus space to newline
  67. "\\n\\n+" "
  68. " ;; multiple newline to one
  69. ) by #'cddr
  70. collect (cons (create-scanner re :multi-line-mode t) repl)))
  71. (defparameter +re-newline+ (create-scanner "\\n" :multi-line-mode t))
  72. (defparameter +re-comma+ (create-scanner ", "))
  73. (defparameter +re-line-horiz+ (loop for re in '("^\\h+" "\\h+$")
  74. collect (create-scanner re :multi-line-mode t)))
  75. (defparameter +re-line-vert+ (loop for re in '("^\\s+" "\\s+$")
  76. collect (create-scanner re :multi-line-mode t)))
  77. (defun trim (txt rexes)
  78. (reduce (lambda (txt re) (regex-replace re txt ""))
  79. rexes :initial-value txt))
  80. (defun dedup (txt re fmt)
  81. (format nil fmt
  82. (loop with seen = '()
  83. for piece in (split re txt)
  84. for clean-piece = (trim piece +re-line-vert+)
  85. unless (member clean-piece seen :test #'equal)
  86. collect clean-piece do (push clean-piece seen))))
  87. (defun cleanup-rendered (text &optional one-line)
  88. (let* ((processed (reduce (lambda (txt p) (regex-replace (car p) txt (cdr p)))
  89. +replacements+ :initial-value text))
  90. (lines (loop with seen-lines = '()
  91. for line in (split +re-newline+ processed)
  92. for clean-line = (trim line +re-line-horiz+)
  93. unless (member clean-line seen-lines :test #'equal)
  94. collect (dedup clean-line +re-comma+ "~{~a~^, ~}")
  95. do (push clean-line seen-lines))))
  96. (if one-line (trim (format nil "~{~a~^, ~}" lines) +re-line-vert+)
  97. (format nil "~a~%"
  98. (trim (format nil "~{~a~^~%~}" lines) +re-line-vert+)))))
  99. (defparameter +re-first-split+ (create-scanner "\\s*\\|\\|\\s*"))
  100. (defparameter +re-any-symbol+ (create-scanner "\\w" :multi-line-mode t))
  101. (defun render (template address &optional one-line)
  102. (labels ((fn-first (text)
  103. (or (car (remove "" (split +re-first-split+ (mustache:render* text address))
  104. :test #'equal)) "")))
  105. (let* ((context (cons (cons "first" #'fn-first) address))
  106. (result (cleanup-rendered (mustache:render* template context) one-line)))
  107. (if (scan +re-any-symbol+ result) result
  108. (cleanup-rendered (format nil "~{~a~^, ~}"
  109. (remove "" (mapcar #'cdr address) :test #'equal))
  110. one-line)))))
  111. (defun fix-country (addr)
  112. addr)
  113. (defun apply-replace (addr replace) addr)
  114. (defun add-code (addr type) addr)
  115. (defun set-attention (addr)
  116. (let ((unknown (loop for (k . v) in addr
  117. unless (member k +known-components+ :test #'equal)
  118. collect v)))
  119. (when unknown (aset addr "attention" (format nil "~{~a~^, ~}" unknown)))
  120. addr))
  121. (defun abbreviate (addr)
  122. (loop for lang in (split "," (agets +country-langs+
  123. (string-upcase (aget "country_code" addr))))
  124. do (loop for (k . abbrevs) in (agets +abbrevs+ lang)
  125. do (when (agets addr k)
  126. (loop with txt = (aget k addr)
  127. for (long . short) in abbrevs
  128. do (setf txt (regex-replace
  129. (create-scanner (format nil "\\b~a\\b" long)
  130. :multi-line-mode t)
  131. txt short))
  132. finally (aset addr k txt)))))
  133. addr)
  134. (defun post-replace (text replace)
  135. (loop with txt = (dedup text +re-comma+ "~{~a~^, ~}")
  136. for (from to) in replace
  137. do (setf txt (regex-replace (create-scanner from :multi-line-mode t)
  138. txt to))
  139. finally (return txt)))
  140. (defun format-address (address &key country-code abbreviate set-attention one-line)
  141. (let* ((uaddr (sanity-clean-address (unify-address address)))
  142. (country-code (or country-code (determine-country-code uaddr))))
  143. (when country-code
  144. (aset uaddr "country_code" country-code))
  145. (multiple-value-bind (tpl tpl-text)
  146. (get-template uaddr country-code)
  147. (let* ((pre-processors (remove nil (list #'fix-country
  148. (when (agets tpl "replace")
  149. (lambda (a)
  150. (apply-replace a (agets tpl "replace"))))
  151. (lambda (a) (add-code a "state"))
  152. (lambda (a) (add-code a "county"))
  153. (when set-attention #'set-attention)
  154. (when abbreviate #'abbreviate))))
  155. (post-processors (when (agets tpl "postformat_replace")
  156. (list (lambda (txt)
  157. (post-replace txt (agets tpl "postformat_replace")))
  158. (lambda (txt) (cleanup-rendered txt one-line)))))
  159. (address (reduce (lambda (a f) (funcall f a)) pre-processors :initial-value uaddr))
  160. (text (render tpl-text address one-line))
  161. (result (reduce (lambda (a f) (funcall f a)) post-processors :initial-value text)))
  162. result))))
  163. (defparameter +re-is-url+ (create-scanner "https?://"))
  164. (defparameter +re-postcode-range+ (create-scanner "(\\d+)[,;]\\d+"))
  165. (defun sanity-clean-address (addr)
  166. (labels ((sanitize (k v)
  167. (unless (scan +re-is-url+ v)
  168. (if (equal k "postcode")
  169. (unless (> (length v) 20)
  170. (multiple-value-bind (m start)
  171. (scan-to-strings +re-postcode-range+ v)
  172. (if m (elt start 0) v)))
  173. v))))
  174. (loop for (k . v) in addr
  175. for val = (sanitize k v)
  176. when val collect (cons k val))))
  177. (defun determine-country-code (addr)
  178. (labels ((comp (key) (aget key addr)))
  179. (when (comp "country_code")
  180. (let* ((country-code (string-upcase (aget "country_code" addr)))
  181. (use-country (agets +worldwide+ country-code "use_country"))
  182. (change-country (agets +worldwide+ country-code "change_country"))
  183. (add-component (agets +worldwide+ country-code "add_component")))
  184. (when (= 2 (length country-code))
  185. (when (equal country-code "UK")
  186. (setf country-code "GB"))
  187. (when use-country
  188. (when change-country
  189. (register-groups-bind (component)
  190. ("\\$(\\w+)" change-country)
  191. (setf change-country (regex-replace (format nil "\\$~a" component)
  192. change-country
  193. (or (comp component) ""))))
  194. (aset addr "country" change-country))
  195. (when (and add-component (position #\= add-component))
  196. (destructuring-bind (k v) (split "=" add-component)
  197. (aset addr k v)))
  198. (setf country-code use-country))
  199. (when (equal "NL" country-code)
  200. (setf country-code
  201. (cond
  202. ((equal "Curaçao" (comp "state"))
  203. (aset addr "country" "Curaçao")
  204. "CW")
  205. ((iscan "^sint maarten" (comp "state"))
  206. (aset addr "country" "Sint Maarten")
  207. "SX")
  208. ((iscan "^Aruba" (comp "state"))
  209. (aset addr "country" "Aruba")
  210. "AW")
  211. (t country-code)))))
  212. country-code))))