s2.lisp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929
  1. (in-package :cl-user)
  2. (defpackage #:ocm.s2
  3. (:use :cl))
  4. (in-package :ocm.s2)
  5. ;; In the process of converting a latitude-longitude pair to a 64-bit cell
  6. ;; id, the following coordinate systems are used:
  7. ;;
  8. ;; (id)
  9. ;; An S2CellId is a 64-bit encoding of a face and a Hilbert curve position
  10. ;; on that face. The Hilbert curve position implicitly encodes both the
  11. ;; position of a cell and its subdivision level (see s2cell_id.h).
  12. ;;
  13. ;; (face, i, j)
  14. ;; Leaf-cell coordinates. "i" and "j" are integers in the range
  15. ;; [0,(2**30)-1] that identify a particular leaf cell on the given face.
  16. ;; The (i, j) coordinate system is right-handed on each face, and the
  17. ;; faces are oriented such that Hilbert curves connect continuously from
  18. ;; one face to the next.
  19. ;;
  20. ;; (face, s, t)
  21. ;; Cell-space coordinates. "s" and "t" are real numbers in the range
  22. ;; [0,1] that identify a point on the given face. For example, the point
  23. ;; (s, t) = (0.5, 0.5) corresponds to the center of the top-level face
  24. ;; cell. This point is also a vertex of exactly four cells at each
  25. ;; subdivision level greater than zero.
  26. ;;
  27. ;; (face, si, ti)
  28. ;; Discrete cell-space coordinates. These are obtained by multiplying
  29. ;; "s" and "t" by 2**31 and rounding to the nearest unsigned integer.
  30. ;; Discrete coordinates lie in the range [0,2**31]. This coordinate
  31. ;; system can represent the edge and center positions of all cells with
  32. ;; no loss of precision (including non-leaf cells). In binary, each
  33. ;; coordinate of a level-k cell center ends with a 1 followed by
  34. ;; (30 - k) 0s. The coordinates of its edges end with (at least)
  35. ;; (31 - k) 0s.
  36. ;;
  37. ;; (face, u, v)
  38. ;; Cube-space coordinates in the range [-1,1]. To make the cells at each
  39. ;; level more uniform in size after they are projected onto the sphere,
  40. ;; we apply a nonlinear transformation of the form u=f(s), v=f(t).
  41. ;; The (u, v) coordinates after this transformation give the actual
  42. ;; coordinates on the cube face (modulo some 90 degree rotations) before
  43. ;; it is projected onto the unit sphere.
  44. ;;
  45. ;; (face, u, v, w)
  46. ;; Per-face coordinate frame. This is an extension of the (face, u, v)
  47. ;; cube-space coordinates that adds a third axis "w" in the direction of
  48. ;; the face normal. It is always a right-handed 3D coordinate system.
  49. ;; Cube-space coordinates can be converted to this frame by setting w=1,
  50. ;; while (u,v,w) coordinates can be projected onto the cube face by
  51. ;; dividing by w, i.e. (face, u/w, v/w).
  52. ;;
  53. ;; (x, y, z)
  54. ;; Direction vector (S2Point). Direction vectors are not necessarily unit
  55. ;; length, and are often chosen to be points on the biunit cube
  56. ;; [-1,+1]x[-1,+1]x[-1,+1]. They can be be normalized to obtain the
  57. ;; corresponding point on the unit sphere.
  58. ;;
  59. ;; (lat, lng)
  60. ;; Latitude and longitude (S2LatLng). Latitudes must be between -90 and
  61. ;; 90 degrees inclusive, and longitudes must be between -180 and 180
  62. ;; degrees inclusive.
  63. ;;
  64. ;; Note that the (i, j), (s, t), (si, ti), and (u, v) coordinate systems are
  65. ;; right-handed on all six faces.
  66. (defparameter +swap-mask+ 1)
  67. (defparameter +invert-mask+ 2)
  68. (defparameter +max-cell-level+ 30 "This is the number of levels needed to specify a leaf cell.")
  69. (defparameter +limit-ij+ (ash 1 +max-cell-level+) "The maximum index of a valid leaf cell plus one. The range of valid leaf cell indices is [0..+limit-ij+-1].")
  70. (defparameter +max-si-ti+ (ash 1 (+ 1 +max-cell-level+)) "The maximum value of an si- or ti-coordinate. The range of valid (si,ti) values is [0..+max-si-ti+].")
  71. ;; An S2CellId is a 64-bit unsigned integer that uniquely identifies a
  72. ;; cell in the S2 cell decomposition. It has the following format:
  73. ;;
  74. ;; id = [face][face_pos]
  75. ;;
  76. ;; face: a 3-bit number (range 0..5) encoding the cube face.
  77. ;;
  78. ;; face_pos: a 61-bit number encoding the position of the center of this
  79. ;; cell along the Hilbert curve over this face (see the Wiki
  80. ;; pages for details).
  81. ;;
  82. ;; Sequentially increasing cell ids follow a continuous space-filling curve
  83. ;; over the entire sphere. They have the following properties:
  84. ;;
  85. ;; - The id of a cell at level k consists of a 3-bit face number followed
  86. ;; by k bit pairs that recursively select one of the four children of
  87. ;; each cell. The next bit is always 1, and all other bits are 0.
  88. ;; Therefore, the level of a cell is determined by the position of its
  89. ;; lowest-numbered bit that is turned on (for a cell at level k, this
  90. ;; position is 2 * (kMaxLevel - k).)
  91. ;;
  92. ;; - The id of a parent cell is at the midpoint of the range of ids spanned
  93. ;; by its children (or by its descendants at any level).
  94. (defparameter +face-bits+ 3)
  95. (defparameter +num-faces+ 6)
  96. (defparameter +max-level+ +max-cell-level+)
  97. (defparameter +pos-bits+ (1+ (* 2 +max-level+)))
  98. (defparameter +max-size+ (ash 1 +max-level+))
  99. (defun st-to-ij (s)
  100. (max 0 (min (- +limit-ij+ 1) (round (- (* s +limit-ij+) 0.5d0)))))
  101. ;; Use quadratic (s,t) -> (u,v) projection. See S2 sources for explanation.
  102. (defun st-to-uv (s)
  103. (* (/ 1 3)
  104. (if (>= s 0.5d0)
  105. (- (* 4 s s) 1)
  106. (- 1 (* 4 (- s 1) (- s 1))))))
  107. (defun uv-to-st (u)
  108. (if (>= u 0)
  109. (* 0.5d0 (sqrt (+ 1 (* 3 u))))
  110. (- 1 (* 0.5d0 (sqrt (- 1 (* 3 u)))))))
  111. (defstruct point
  112. (x 0.0d0 :type double-float)
  113. (y 0.0d0 :type double-float)
  114. (z 0.0d0 :type double-float))
  115. (defstruct latlng
  116. (lat 0d0 :type double-float)
  117. (lng 0d0 :type double-float))
  118. (defun deg-to-rad (deg)
  119. (* (/ pi 180d0) deg))
  120. (defun rad-to-deg (rad)
  121. (* (/ 180d0 pi) rad))
  122. (defun from-radians (lat-rad lng-rad)
  123. (make-latlng :lat lat-rad :lng lng-rad))
  124. (defun from-degrees (lat-deg lng-deg)
  125. (make-latlng :lat (deg-to-rad lat-deg) :lng (deg-to-rad lng-deg)))
  126. (defun to-degrees (latlng)
  127. (declare (type latlng latlng))
  128. (with-slots (lat lng) latlng
  129. (cons (rad-to-deg lat) (rad-to-deg lng))))
  130. (defun latlng-to-point (latlng)
  131. (declare (type latlng latlng))
  132. (with-slots (lat lng) latlng
  133. (let* ((phi lat)
  134. (theta lng)
  135. (cos-phi (cos phi)))
  136. (make-point :x (* (cos theta) cos-phi)
  137. :y (* (sin theta) cos-phi)
  138. :z (sin phi)))))
  139. (defun point-lat (p)
  140. (declare (type point p))
  141. (with-slots (x y z) p
  142. (atan z (sqrt (+ (* x x) (* y y))))))
  143. (defun point-lng (p)
  144. (declare (type point p))
  145. (with-slots (x y z) p
  146. (atan y x)))
  147. (defun point-to-lat-lng (p)
  148. (declare (type point p))
  149. (from-radians (point-lat p) (point-lng p)))
  150. (defun dot (p1 p2)
  151. (declare (type point p1 p2))
  152. (+ (* (point-x p1) (point-x p2))
  153. (* (point-y p1) (point-y p2))
  154. (* (point-z p1) (point-z p2))))
  155. (defparameter +face-uvw-axes+
  156. (make-array '(6 3) :element-type 'point :initial-contents
  157. (list
  158. (list (make-point :y 1d0) (make-point :z 1d0) (make-point :x 1d0))
  159. (list (make-point :x -1d0) (make-point :z 1d0) (make-point :y 1d0))
  160. (list (make-point :x -1d0) (make-point :y -1d0) (make-point :z 1d0))
  161. (list (make-point :z -1d0) (make-point :y -1d0) (make-point :x -1d0))
  162. (list (make-point :z -1d0) (make-point :x 1d0) (make-point :y -1d0))
  163. (list (make-point :y 1d0) (make-point :x 1d0) (make-point :z -1d0))))
  164. "The U,V,W axes for each face.")
  165. (defun get-xyz-face (p)
  166. (declare (type point p))
  167. (with-slots (x y z) p
  168. (cond
  169. ((and (> (abs x) (abs y))
  170. (> (abs x) (abs z)))
  171. (if (>= x 0) 0 3))
  172. ((and (> (abs y) (abs x))
  173. (> (abs y) (abs z)))
  174. (if (>= y 0) 1 4))
  175. (t
  176. (if (>= z 0) 2 5)))))
  177. (defun get-uvw-axis (face axis)
  178. (aref +face-uvw-axes+ face axis))
  179. (defun get-norm (face)
  180. (get-uvw-axis face 2))
  181. (defun valid-face-xyz-to-uv (face p)
  182. (declare (type point p))
  183. (assert (> (dot (get-norm face) p) 0))
  184. (with-slots (x y z) p
  185. (ecase face
  186. (0 (values (/ y x) (/ z x)))
  187. (1 (values (/ (- x) y) (/ z y)))
  188. (2 (values (/ (- x) z) (/ (- y) z)))
  189. (3 (values (/ z x) (/ y x)))
  190. (4 (values (/ z y) (/ (- x) y)))
  191. (5 (values (/ (- y) z) (/ (- x) z))))))
  192. (defun xyz-to-face-uv (p)
  193. (declare (type point p))
  194. (let ((face (get-xyz-face p)))
  195. (multiple-value-bind (u v)
  196. (valid-face-xyz-to-uv face p)
  197. (values face u v))))
  198. (defparameter +lookup-bits+ 4)
  199. (defvar *lookup-pos* (make-array (ash 1 (+ 2 (* +lookup-bits+ 2))) :element-type '(unsigned-byte 16)))
  200. (defvar *lookup-ij* (make-array (ash 1 (+ 2 (* +lookup-bits+ 2))) :element-type '(unsigned-byte 16)))
  201. (defparameter +ij-to-pos+
  202. #(#(0 1 3 2) ; canonical order
  203. #(0 3 1 2) ; axes swapped
  204. #(2 3 1 0) ; bits inverted
  205. #(2 1 3 0)); swapped & inverted
  206. "IJtoPos[orientation][ij] -> pos")
  207. (defparameter +pos-to-ij+
  208. #(#(0 1 3 2) ; canonical order: (0,0), (0,1), (1,1), (1,0)
  209. #(0 2 3 1) ; axes swapped: (0,0), (1,0), (1,1), (0,1)
  210. #(3 2 0 1) ; bits inverted: (1,1), (1,0), (0,0), (0,1)
  211. #(3 1 0 2)); swapped & inverted: (1,1), (0,1), (0,0), (1,0)
  212. "PosToIJ[orientation][pos] -> ij")
  213. (defparameter +pos-to-orient+
  214. (make-array 4 :element-type '(unsigned-byte 8)
  215. :initial-contents (list +swap-mask+ 0 0 (logior +swap-mask+ +invert-mask+)))
  216. "PosToOrientation[pos] -> orientation_modifier")
  217. (defun init-lookup ()
  218. (labels ((init-lookup-cell (level i j orig-orient pos orient)
  219. (if (= level +lookup-bits+)
  220. (let ((ij (+ (ash i +lookup-bits+) j)))
  221. (setf (aref *lookup-pos* (+ (ash ij 2) orig-orient)) (+ (ash pos 2) orient))
  222. (setf (aref *lookup-ij* (+ (ash pos 2) orig-orient)) (+ (ash ij 2) orient)))
  223. (let ((level (1+ level))
  224. (i (ash i 1))
  225. (j (ash j 1))
  226. (pos (ash pos 2))
  227. (r (aref +pos-to-ij+ orient)))
  228. (init-lookup-cell level (+ i (ash (aref r 0) -1)) (+ j (logand (aref r 0) 1))
  229. orig-orient pos (logxor orient (aref +pos-to-orient+ 0)))
  230. (init-lookup-cell level (+ i (ash (aref r 1) -1)) (+ j (logand (aref r 1) 1))
  231. orig-orient (+ pos 1) (logxor orient (aref +pos-to-orient+ 1)))
  232. (init-lookup-cell level (+ i (ash (aref r 2) -1)) (+ j (logand (aref r 2) 1))
  233. orig-orient (+ pos 2) (logxor orient (aref +pos-to-orient+ 2)))
  234. (init-lookup-cell level (+ i (ash (aref r 3) -1)) (+ j (logand (aref r 3) 1))
  235. orig-orient (+ pos 3) (logxor orient (aref +pos-to-orient+ 3)))))))
  236. (init-lookup-cell 0 0 0 0 0 0)
  237. (init-lookup-cell 0 0 0 +swap-mask+ 0 +swap-mask+)
  238. (init-lookup-cell 0 0 0 +invert-mask+ 0 +invert-mask+)
  239. (init-lookup-cell 0 0 0 (logior +swap-mask+ +invert-mask+) 0 (logior +swap-mask+ +invert-mask+))))
  240. (eval-when (:execute :compile-toplevel :load-toplevel)
  241. (init-lookup))
  242. (defun from-face-ij (face i j)
  243. (let ((n (ash face (1- +pos-bits+)))
  244. (bits (logand face +swap-mask+)))
  245. (macrolet ((get-bits (k)
  246. `(let ((mask (1- (ash 1 +lookup-bits+))))
  247. (setf bits
  248. (aref *lookup-pos*
  249. (+ bits
  250. (ash (logand mask (ash i (- (* ,k +lookup-bits+)))) (+ +lookup-bits+ 2))
  251. (ash (logand mask (ash j (- (* ,k +lookup-bits+)))) 2))))
  252. (setf n (logior n (ash (ash bits -2)
  253. (* ,k 2 +lookup-bits+)))
  254. bits (logand bits (logior +swap-mask+ +invert-mask+))))))
  255. (get-bits 7)
  256. (get-bits 6)
  257. (get-bits 5)
  258. (get-bits 4)
  259. (get-bits 3)
  260. (get-bits 2)
  261. (get-bits 1)
  262. (get-bits 0))
  263. (1+ (* 2 n))))
  264. (defun point-to-cell (p)
  265. (multiple-value-bind (face u v)
  266. (xyz-to-face-uv p)
  267. (from-face-ij face
  268. (st-to-ij (uv-to-st u))
  269. (st-to-ij (uv-to-st v)))))
  270. (defun cell-to-face-ij-orientation (id)
  271. (let* ((i 0)
  272. (j 0)
  273. (face (cell-face id))
  274. (bits (logand face +swap-mask+)))
  275. (macrolet ((get-bits (k)
  276. `(let ((nbits (if (= ,k 7) (- +max-level+ (* 7 +lookup-bits+)) +lookup-bits+)))
  277. (setf bits (aref *lookup-ij*
  278. (+ bits
  279. (ash (logand (ash id (- (1+ (* ,k 2 +lookup-bits+))))
  280. (1- (ash 1 (* 2 nbits)))) 2))))
  281. (setf i (+ i (ash (ash bits (- (+ +lookup-bits+ 2)))
  282. (* ,k +lookup-bits+)))
  283. j (+ j (ash (logand (ash bits -2)
  284. (1- (ash 1 +lookup-bits+)))
  285. (* ,k +lookup-bits+)))
  286. bits (logand bits (logior +swap-mask+ +invert-mask+))))))
  287. (get-bits 7)
  288. (get-bits 6)
  289. (get-bits 5)
  290. (get-bits 4)
  291. (get-bits 3)
  292. (get-bits 2)
  293. (get-bits 1)
  294. (get-bits 0)
  295. (values face i j (logxor bits (if (zerop (logand (cell-lsb id) #x1111111111111110)) 0 +swap-mask+))))))
  296. (defun cell-get-center-face-si-ti (id)
  297. (multiple-value-bind (face i j orient) (cell-to-face-ij-orientation id)
  298. (declare (ignore orient))
  299. (let ((delta (if (cell-is-leaf id) 1 (if (zerop (logand 1 (logxor i (ash id -2)))) 0 2))))
  300. (values face (+ delta (* 2 i)) (+ delta (* 2 j))))))
  301. (defun face-si-ti-to-xyz (face si ti)
  302. (face-uv-ti-xyz face
  303. (st-to-uv (si-ti-to-st si))
  304. (st-to-uv (si-ti-to-st ti))))
  305. (defun face-uv-ti-xyz (face u v)
  306. (declare (type double-float u v))
  307. (ecase face
  308. (0 (make-point :x 1d0 :y u :z v))
  309. (1 (make-point :x (- u) :y 1d0 :z v))
  310. (2 (make-point :x (- u) :y (- v) :z 1d0))
  311. (3 (make-point :x -1d0 :y (- v) :z (- u)))
  312. (4 (make-point :x v :y -1d0 :z (- u)))
  313. (5 (make-point :x v :y u :z -1d0))))
  314. (defun si-ti-to-st (si)
  315. (assert (<= si +max-si-ti+))
  316. (* si (/ 1d0 +max-si-ti+)))
  317. (defun cell-to-point-raw (id)
  318. (apply #'face-si-ti-to-xyz (multiple-value-list (cell-get-center-face-si-ti id))))
  319. (defun find-lsb-set (num)
  320. (loop for i upto 63 when (logbitp i num) return i))
  321. (defun find-msb-set (num)
  322. (loop for i from 63 downto 0 when (logbitp i num) return i))
  323. (defun lsb-for-level (level)
  324. (ash 1 (* 2 (- +max-level+ level))))
  325. (defun cell-face (id)
  326. (ash id (- +pos-bits+)))
  327. (defun cell-lsb (id)
  328. (logand id (1+ (lognot id))))
  329. (defun cell-level (id)
  330. (assert (not (zerop id)))
  331. (- +max-level+ (ash (find-lsb-set id) -1)))
  332. (defun cell-pos (id)
  333. (logand id (lognot 0)))
  334. (defun cell-is-valid (id)
  335. (and (< (cell-face id) +num-faces+)
  336. (not (zerop (logand (cell-lsb id) #x1555555555555555)))))
  337. (defun cell-child-position (id &optional level)
  338. (unless level (setf level (cell-level id)))
  339. (assert (cell-is-valid id))
  340. (assert (>= level 1))
  341. (assert (<= level (cell-level id)))
  342. (logand 3 (ash id (- (1+ (* 2 (- +max-level+ level)))))))
  343. (defun cell-range-min (id)
  344. (- id (1- (cell-lsb id)) ))
  345. (defun cell-range-max (id)
  346. (+ id (1- (cell-lsb id))))
  347. (defun cell-is-leaf (id)
  348. (not (zerop (logand id 1))))
  349. (defun cell-is-face (id)
  350. (zerop (logand id (1- (lsb-for-level 0)))))
  351. (defun cell-parent (id &optional level)
  352. (assert (cell-is-valid id))
  353. (if level (progn
  354. (assert (>= level 0))
  355. (assert (<= level (cell-level id))))
  356. (progn (assert (not (cell-is-face id)))))
  357. (let ((new-lsb (if level (lsb-for-level level) (ash (cell-lsb id) 2))))
  358. (logior new-lsb (logand id (1+ (lognot new-lsb))))))
  359. (defun cell-child (id pos)
  360. (assert (cell-is-valid id))
  361. (assert (not (cell-is-leaf id)))
  362. (+ id (* (+ (* 2 pos) 1 -4) (ash (cell-lsb id) -2))))
  363. (defun cell-to-string (id)
  364. (format nil "~a/~{~a~}" (cell-face id)
  365. (loop for level from 1 to (cell-level id)
  366. collect (cell-child-position id level))))
  367. ;;;;;;;;;
  368. ;;;
  369. (defun make-span (vector offset &optional size)
  370. (declare (type vector vector))
  371. (let ((length (length vector)))
  372. (assert (< offset length))
  373. (when size (assert (< (+ offset size) length)))
  374. (make-array (if size size (- length offset))
  375. :element-type (or (cadr (type-of vector)) t)
  376. :displaced-to vector
  377. :displaced-index-offset offset)))
  378. (deftype byte-vector () `(vector (unsigned-byte 8)))
  379. (defun make-encoder (size)
  380. (make-array size :element-type '(unsigned-byte 8) :fill-pointer 0 :adjustable t))
  381. (defun encoder-avail (encoder)
  382. (declare (type byte-vector encoder))
  383. (- (array-total-size encoder)
  384. (fill-pointer encoder)))
  385. (defun encoder-ensure (encoder size)
  386. (declare (type byte-vector encoder))
  387. (when (< (encoder-avail encoder) size)
  388. (adjust-array encoder (+ (fill-pointer encoder) size))))
  389. (defun encoder-put8 (encoder value)
  390. (declare (type byte-vector encoder)
  391. (type (unsigned-byte 8) value))
  392. (assert (> (encoder-avail encoder) 1))
  393. (vector-push value encoder)
  394. encoder)
  395. (defun encoder-putn (encoder vector)
  396. (declare (type byte-vector encoder)
  397. (type byte-vector vector))
  398. (loop for value across vector do (vector-push value encoder))
  399. encoder)
  400. (defun encoder-length (encoder)
  401. (declare (type byte-vector encoder))
  402. (fill-pointer encoder))
  403. (defparameter +varint-max32+ 5 "Maximum varint encoding length for uint32")
  404. (defun encoder-put-varint32 (encoder v)
  405. (declare (type byte-vector encoder)
  406. (type (unsigned-byte 32) v))
  407. (let ((b 128))
  408. (cond
  409. ((< v (ash 1 7)) (encoder-put8 encoder v))
  410. ((< v (ash 1 14))
  411. (encoder-put8 encoder (logand 255 (logior v b)))
  412. (encoder-put8 encoder (logand 255 (ash v -7))))
  413. ((< v (ash 1 21))
  414. (encoder-put8 encoder (logand 255 (logior v b)))
  415. (encoder-put8 encoder (logand 255 (logior (ash v -7) b)))
  416. (encoder-put8 encoder (logand 255 (ash v -14))))
  417. ((< v (ash 1 28))
  418. (encoder-put8 encoder (logand 255 (logior v b)))
  419. (encoder-put8 encoder (logand 255 (logior (ash v -7) b)))
  420. (encoder-put8 encoder (logand 255 (logior (ash v -14) b)))
  421. (encoder-put8 encoder (logand 255 (ash v -21))))
  422. (t
  423. (encoder-put8 encoder (logand 255 (logior v b)))
  424. (encoder-put8 encoder (logand 255 (logior (ash v -7) b)))
  425. (encoder-put8 encoder (logand 255 (logior (ash v -14) b)))
  426. (encoder-put8 encoder (logand 255 (logior (ash v -21) b)))
  427. (encoder-put8 encoder (logand 255 (ash v -28))))))
  428. encoder)
  429. (defparameter +varint-max64+ 10 "Maximum varint encoding length for uint64")
  430. (defun encoder-put-varint64 (encoder v)
  431. (declare (type byte-vector encoder)
  432. (type (unsigned-byte 64) v))
  433. (if (< v (ash 1 28))
  434. (encoder-put-varint32 encoder v)
  435. (let ((x32 (logand (1- (ash 1 32)) (logior v (ash 1 7) (ash 1 21))))
  436. (y32 (logand (1- (ash 1 32)) (logior v (ash 1 14) (ash 1 28)))))
  437. (encoder-put8 encoder (logand 255 x32))
  438. (encoder-put8 encoder (logand 255 (ash y32 -7)))
  439. (encoder-put8 encoder (logand 255 (ash x32 -14)))
  440. (encoder-put8 encoder (logand 255 (ash y32 -21)))
  441. (if (< v (ash 1 35))
  442. (encoder-put8 encoder (logand 255 (ash v -28)))
  443. (progn
  444. (encoder-put8 encoder (logand 255 (logior (ash v -28) (ash 1 7))))
  445. (encoder-put-varint32 encoder (ash v -35)))))))
  446. ;;; decoder ;;;
  447. (defun make-decoder (size &key (fill-pointer 0) initial-contents initial-element displaced-to)
  448. (apply #'make-array size
  449. :element-type '(unsigned-byte 8)
  450. :fill-pointer fill-pointer
  451. (append
  452. (when initial-contents `(:initial-contents ,initial-contents))
  453. (when initial-element `(:initial-element ,initial-element))
  454. (when displaced-to `(:displaced-to ,displaced-to :displaced-index-offset ,(fill-pointer displaced-to))))))
  455. (defun decoder-avail (decoder)
  456. (declare (type byte-vector decoder))
  457. (- (array-total-size decoder)
  458. (fill-pointer decoder)))
  459. (defun decoder-skip (decoder &optional (count 1))
  460. (declare (type byte-vector decoder)
  461. (type (unsigned-byte 32) count))
  462. (incf (fill-pointer decoder) count))
  463. (defun decoder-reset (decoder)
  464. (declare (type byte-vector decoder))
  465. (setf (fill-pointer decoder) 0)
  466. decoder)
  467. (defun make-sub-decoder (decoder size &optional will-decode)
  468. (when (>= (decoder-avail decoder) size)
  469. (prog1 (make-decoder size :fill-pointer (if will-decode 0 size) :displaced-to decoder)
  470. (decoder-skip decoder size))))
  471. (defmacro with-decoder-reset ((decoder &optional (offset 0)) &body body)
  472. (let ((fp (gensym "FP"))
  473. (dc (gensym "DC")))
  474. `(let* ((,dc ,decoder)
  475. (,fp (fill-pointer ,dc)))
  476. (unwind-protect
  477. (progn
  478. (setf (fill-pointer ,dc) ,offset)
  479. ,@body)
  480. (setf (fill-pointer ,dc) ,fp)))))
  481. (defun decoder-get8 (decoder)
  482. (declare (type byte-vector decoder))
  483. (prog1 (aref decoder (fill-pointer decoder))
  484. (decoder-skip decoder)))
  485. (defun decoder-peek (decoder &optional (offset 0))
  486. (declare (type byte-vector decoder)
  487. (type (unsigned-byte 32) offset))
  488. (aref decoder (+ (fill-pointer decoder) offset)))
  489. (defun decoder-get-varint64 (decoder)
  490. (declare (type byte-vector decoder))
  491. (loop
  492. for b = (decoder-get8 decoder) then (decoder-get8 decoder)
  493. for i from 0
  494. with result = 0
  495. do (incf result (ash (if (zerop i) b (1- b)) (* i 7)))
  496. until (< b 128)
  497. finally (return result)))
  498. ;;;
  499. ;;; encoded-uint-vector ;;;
  500. ;;;
  501. (defun encode-uint-with-length (value length encoder)
  502. (declare (type integer value)
  503. (type (integer 0 8) length)
  504. (type byte-vector encoder))
  505. (assert (>= (encoder-avail encoder) length))
  506. (loop repeat length
  507. do (encoder-put8 encoder (logand value 255))
  508. do (setf value (ash value -8)))
  509. (assert (zerop value))
  510. (values))
  511. (defun get-uint-with-length (decoder length)
  512. (declare (type byte-vector decoder)
  513. (type (integer 0 8) length))
  514. (let ((value 0))
  515. (loop for i from (1- length) downto 0
  516. do (setf value (+ (ash value 8)
  517. (decoder-peek decoder i))))
  518. (decoder-skip decoder length)
  519. value))
  520. (defun get-type-byte-size (v-type)
  521. (values
  522. (typecase v-type
  523. (list (case (car v-type)
  524. ((array simple-array vector) (get-type-byte-size (cadr v-type)))
  525. ((unsigned-byte signed-byte) (ceiling (cadr v-type) 8))
  526. (integer (when (cadr v-type) (ceiling (log (cadr v-type) 2) 8)))
  527. (t nil)))
  528. (symbol (when (eql v-type 'fixnum) 8))
  529. (t nil))))
  530. (defun encode-uint-vector (v encoder)
  531. (declare (type vector v)
  532. (type byte-vector encoder))
  533. (let ((length (1+ (ash (find-msb-set
  534. (loop
  535. for value across v
  536. with bits = 1
  537. do (setf bits (logior bits value))
  538. finally (return bits)))
  539. -3))))
  540. (assert (<= 1 length 8))
  541. (encoder-ensure encoder (+ (* (length v) length)
  542. +varint-max64+))
  543. (encoder-put-varint64 encoder
  544. (logior (* (length v) (get-type-byte-size (type-of v)))
  545. (1- length)))
  546. (loop for value across v
  547. do (encode-uint-with-length value length encoder))
  548. encoder))
  549. (defstruct encoded-uint-vector
  550. (size 0 :type (unsigned-byte 32))
  551. (length 1 :type (integer 1 8))
  552. (bytes 8 :type (member 1 2 4 8))
  553. (data nil :type byte-vector))
  554. (defun encoded-uint-vector-init (decoder &optional (bytes 8))
  555. (declare (type byte-vector decoder)
  556. (type (member 1 2 4 8) bytes))
  557. (let ((size-len (decoder-get-varint64 decoder)))
  558. (when size-len
  559. (let* ((size (floor size-len bytes))
  560. (length (1+ (logand size-len (1- bytes))))
  561. (bytes-length (* size length))
  562. (data (make-sub-decoder decoder bytes-length)))
  563. (when data
  564. (make-encoded-uint-vector :size size :length length :bytes bytes
  565. :data data))))))
  566. (defun encoded-uint-vector-get (euv pos)
  567. (declare (type encoded-uint-vector euv)
  568. (type (unsigned-byte 32) pos))
  569. (with-slots (size data length) euv
  570. (assert (< pos size))
  571. (with-decoder-reset (data (* pos length))
  572. (get-uint-with-length data length))))
  573. (defun encoded-uint-vector-decode (euv)
  574. (declare (type encoded-uint-vector euv))
  575. (let* ((size (encoded-uint-vector-size euv))
  576. (result (make-array size :element-type '(unsigned-byte 64))))
  577. (dotimes (i size result)
  578. (setf (aref result i) (encoded-uint-vector-get euv i)))))
  579. ;; encoded-string-vector
  580. (defstruct string-vector-encoder
  581. (offsets (make-array 0 :element-type '(unsigned-byte 64) :adjustable t :fill-pointer 0)
  582. :type (vector (unsigned-byte 64)))
  583. (data (make-encoder 0) :type byte-vector))
  584. (defun string-vector-encoder-add-via-encoder (sve)
  585. (declare (type string-vector-encoder sve))
  586. (with-slots (offsets data) sve
  587. (vector-push-extend (length data) offsets)
  588. data))
  589. (defun string-vector-encoder-size (sve)
  590. (declare (type string-vector-encoder sve))
  591. (length (string-vector-encoder-offsets sve)))
  592. (defun string-vector-encoder-encode (sve encoder)
  593. (declare (type string-vector-encoder sve)
  594. (type byte-vector encoder))
  595. (with-slots (offsets data) sve
  596. (vector-push-extend (length data) offsets)
  597. (encode-uint-vector (make-span offsets 1) encoder)
  598. (encoder-ensure encoder (encoder-length data))
  599. (encoder-putn encoder data)
  600. encoder))
  601. (defstruct encoded-string-vector
  602. (offsets nil :type encoded-uint-vector)
  603. (data nil :type byte-vector))
  604. (defun encoded-string-vector-init (decoder)
  605. (declare (type byte-vector decoder))
  606. (let ((offsets (encoded-uint-vector-init decoder 8)))
  607. (when offsets
  608. (let* ((size (encoded-uint-vector-size offsets))
  609. (length (if (zerop size) 0 (encoded-uint-vector-get offsets (1- size))))
  610. (data (make-sub-decoder decoder length)))
  611. (when data
  612. (make-encoded-string-vector :offsets offsets :data data))))))
  613. (defun encoded-string-vector-size (esv)
  614. (declare (type encoded-string-vector esv))
  615. (encoded-uint-vector-size (encoded-string-vector-offsets esv)))
  616. (defun encoded-string-vector-get (esv index &optional fill-pointer)
  617. (declare (type encoded-string-vector esv)
  618. (type (unsigned-byte 32) index))
  619. (with-slots (offsets data) esv
  620. (let* ((start (if (zerop index) 0 (encoded-uint-vector-get offsets (1- index))))
  621. (length (- (encoded-uint-vector-get offsets index) start)))
  622. (with-decoder-reset (data start)
  623. (make-decoder length :displaced-to data :fill-pointer fill-pointer)))))
  624. (defun encoded-string-vector-decode (esv)
  625. (declare (type encoded-string-vector esv))
  626. (let* ((size (encoded-string-vector-size esv))
  627. (result (make-array size :element-type '(vector (unsigned-byte 8)))))
  628. (dotimes (i size result)
  629. (setf (aref result i) (encoded-string-vector-get esv i)))))
  630. ;; encoded-cell-id-vector
  631. (defstruct encoded-cell-id-vector
  632. (deltas nil :type encoded-uint-vector)
  633. (base 0 :type (unsigned-byte 64))
  634. (shift 0 :type (unsigned-byte 8)))
  635. (defun encoded-cell-id-vector-size (ecv)
  636. (declare (type encoded-cell-id-vector ecv))
  637. (with-slots (deltas) ecv
  638. (when deltas
  639. (encoded-uint-vector-size deltas))))
  640. (defun encoded-cell-id-vector-get (ecv i)
  641. (declare (type encoded-cell-id-vector ecv)
  642. (type (unsigned-byte 32) i))
  643. (with-slots (deltas base shift) ecv
  644. (+ base
  645. (ash (encoded-uint-vector-get deltas i) shift))))
  646. (defun encoded-cell-id-vector-init (decoder)
  647. (declare (type byte-vector decoder))
  648. (when (>= (decoder-avail decoder) 2)
  649. (let* ((code+len (decoder-get8 decoder))
  650. (shift-code (ash code+len -3)))
  651. (when (= shift-code 31)
  652. (setf shift-code (+ 29 (decoder-get8 decoder))))
  653. (let* ((base-len (logand code+len 7))
  654. (base (get-uint-with-length decoder base-len))
  655. shift)
  656. (when base
  657. (setf base (ash base (- 64 (* 8 (max 1 base-len)))))
  658. (if (>= shift-code 29)
  659. (setf shift (1+ (* 2 (- shift-code 29)))
  660. base (logior base (ash 1 (1- shift))))
  661. (setf shift (* 2 shift-code)))
  662. (make-encoded-cell-id-vector
  663. :deltas (encoded-uint-vector-init decoder 8)
  664. :base base
  665. :shift shift))))))
  666. (defun encoded-cell-id-vector-decode (eciv)
  667. (declare (type encoded-cell-id-vector eciv))
  668. (let* ((size (encoded-cell-id-vector-size eciv))
  669. (result (make-array size :element-type '(unsigned-byte 64))))
  670. (dotimes (i size result)
  671. (setf (aref result i) (encoded-cell-id-vector-get eciv i)))))
  672. (defun encode-cell-id-vector (v encoder)
  673. (declare (type (vector (unsigned-byte 64)) v)
  674. (type byte-vector encoder))
  675. (destructuring-bind (v-or v-and v-min v-max)
  676. (loop for cell-id across v
  677. with v-or = 0
  678. with v-and = -1
  679. do (setf v-or (logior v-or cell-id)
  680. v-and (logand v-and cell-id))
  681. maximizing cell-id into v-max
  682. minimizing cell-id into v-min
  683. finally (return (list v-or v-and v-min v-max)))
  684. (let ((e-base 0)
  685. (e-base-len 0)
  686. (e-shift 0)
  687. (e-max-delta-msb 0))
  688. (when (> v-or 0)
  689. (setf e-shift (min 56 (dpb 0 (byte 1 0) (find-lsb-set v-or))))
  690. (unless (zerop (logand v-and (ash 1 e-shift)))
  691. (incf e-shift))
  692. (let ((e-bytes (1- (ash 1 65))))
  693. (dotimes (len 8)
  694. (let* ((t-base (mask-field (byte (* 8 len) (- 64 (* 8 len))) v-min))
  695. (t-max-delta-msb (max 0 (1- (integer-length (ash (- v-max t-base)
  696. (- e-shift))))))
  697. (t-bytes (+ len (* (length v) (1+ (ash t-max-delta-msb -3))))))
  698. (when (< t-bytes e-bytes)
  699. (setf e-base t-base
  700. e-base-len len
  701. e-max-delta-msb t-max-delta-msb
  702. e-bytes t-bytes)))))
  703. (when (and (logbitp 0 e-shift)
  704. (not (= (logand e-max-delta-msb 7) 7)))
  705. (decf e-shift)))
  706. (assert (<= e-base-len 7))
  707. (assert (<= e-shift 56))
  708. (encoder-ensure encoder (+ 2 e-base-len))
  709. (let ((shift-code (ash e-shift -1)))
  710. (when (logbitp 0 e-shift)
  711. (setf shift-code (min 31 (+ shift-code 29))))
  712. (encoder-put8 encoder (logior (ash shift-code 3)
  713. e-base-len))
  714. (when (= shift-code 31)
  715. (encoder-put8 encoder (ash e-shift -1))))
  716. (encode-uint-with-length
  717. (ash e-base (- (- 64 (* 8 (max 1 e-base-len))))) e-base-len encoder)
  718. (let ((deltas (make-array (length v) :element-type '(unsigned-byte 64))))
  719. (loop for cell-id across v
  720. for i from 0
  721. do (setf (aref deltas i)
  722. (ash (- cell-id e-base) (- e-shift))))
  723. (encode-uint-vector deltas encoder)))))
  724. ;; encoded-point-vector
  725. (defstruct encoded-point-vector
  726. (size 0 :type (unsigned-byte 32))
  727. (blocks nil :type encoded-string-vector)
  728. (base 0 :type (unsigned-byte 64))
  729. (level 0 :type (unsigned-byte 8))
  730. (have-exceptions nil :type boolean))
  731. (defstruct cell-point
  732. (level 0 :type (unsigned-byte 8))
  733. (face 0 :type (unsigned-byte 8))
  734. (si 0 :type (unsigned-byte 32))
  735. (ti 0 :type (unsigned-byte 32)))
  736. (defparameter +block-shift+ 4)
  737. (defparameter +block-size+ (ash 1 +block-shift+))
  738. (defparameter +exception+ -1)
  739. (defstruct block-code
  740. (delta-bits 0 :type (signed-byte 32))
  741. (offset-bits 0 :type (signed-byte 32))
  742. (overlap-bits 0 :type (signed-byte 32)))
  743. (defun bit-mask (n)
  744. (declare (type (integer 0 64) n))
  745. (if (zerop n) 0
  746. (dpb -1 (byte n 0) 0)))
  747. (defun max-bits-for-level (level)
  748. (declare (type (integer 0 30) level))
  749. (+ (* 2 level) 3))
  750. (defun base-shift (level base-bits)
  751. (declare (type (integer 0 30) level)
  752. (type (integer 0 64) base-bits))
  753. (max 0 (- (max-bits-for-level level) base-bits)))
  754. (defun encode-point-vector-compact (points encoder)
  755. (declare (type (vector point) points)
  756. (type byte-vector encoder))
  757. (multiple-value-bind (level cell-points)
  758. (choose-best-level points)
  759. (when (< level 0)
  760. (return (encode-point-vector-fast points encoder)))
  761. (multiple-value-bind (values have-exceptions) (convert-cells-to-values cell-points level)
  762. (multiple-value-bind (base base-bits) (choose-base values level have-exceptions)
  763. (let ((num-blocks (ash (+ (length values) +block-size+ -1) (- +block-shift+)))
  764. (base-bytes (ash base-bits -3)))
  765. (encoder-ensure encoder (+ 2 base-bytes))
  766. (let ((last-block-count (- (length values) (* +block-size+ (1- num-blocks))))
  767. (blocks (make-string-vector-encoder))
  768. (exceptions (make-array 0 :element-type point :adjustable t)))
  769. (assert (>= last-block-count 0))
  770. (assert (<= last-block-count +block-size+))
  771. (assert (<= base-bytes 7))
  772. (assert (<= level 30))
  773. (encoder-put8 (logior +encoded-point-vector-cell-ids+
  774. (ash have-exceptions 3)
  775. (ash (1- last-block-count) 4)))
  776. (encoder-put8 (logior base-bytes
  777. (ash level 3)))
  778. (encode-uint-with-length (ash base (- (base-shift level base-bits))) base-bytes encoder)
  779. (loop for i from 0 to (length values) step +block-size+
  780. for block-size = (min +block-size+ (- (length values) i))
  781. for code = (get-block-code values i block-size base have-exceptions)
  782. do (let ((block (string-vector-encoder-add-via-encoder blocks))
  783. (offset-bytes (ash (block-code-offset-bits code) -3))
  784. (delta-nibbles (ash (block-code-delta-bits code) -2))
  785. (overlap-nibbles (ash (block-code-overlap-bits code) -2)))
  786. (encoder-ensure block (+ 1 offset-bytes (* delta-nibbles (ash +block-size+ -1))))
  787. (assert (<= (- offset-bytes overlap-nibbles) 7))
  788. (assert (<= overlap-nibbles 1))
  789. (assert (<= delta-nibbles 16))
  790. (encoder-put8 block (logior (- offset-bytes overlap-nibbles)
  791. (ash overlap-nibbles 3)
  792. (ash (1- delta-nibbles) 4)))
  793. (let ((offset (1- (ash 1 65)))
  794. (num-exceptions 0))
  795. (dotimes (i block-size)
  796. (let ((value (aref values (+ i j))))
  797. (if (equalp value +exception+)
  798. (incf num-exceptions)
  799. (progn
  800. (assert (>= value base))
  801. (setf offset (min offset (- value base)))))))
  802. (when (= num-exceptions block-size)
  803. (setf offset))
  804. (let ((offset-shift (- (block-code-delta-bits code)
  805. (block-code-overlap-bits code))))
  806. (setf offset (logand offset (lognot (bit-mask offset-shift))))
  807. (assert (= (zerop offset) (zerop offset-bytes)))
  808. (when (> offset 0)
  809. (encode-uint-with-length (ash offset (- offset-shift)) offset-bytes block))
  810. (let ((delta-bytes (ash (1+ delta-nibbles) -1)))
  811. (setf (fill-pointer exceptions) 0)
  812. (dotimes (j block-size)
  813. (let ((delta 0)
  814. (value (arerf values (+ i j))))
  815. (if (equalp value +exception+)
  816. (progn
  817. (setf delta (length exceptions))
  818. (vector-push-extend (aref points (+ i j)) exceptions))
  819. (progn
  820. (assert (>= value (+ offset base)))
  821. (setf delta (- value (+ offset base)))
  822. (when have-exceptions
  823. (assert (<= delta (- (1- (ash 1 65)) +block-size+)))
  824. (incf delta +block-size+))))
  825. (assert (<= delta (bit-mask (block-code-delta-bits delta))))
  826. (when (and (logbitp delta-nibbles 0)
  827. (logbitp j 0))
  828. ())
  829. )))
  830. ))))
  831. ))))))