fisher-breaks.lisp 3.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. (defun make-pair (value weight) (cons value weight))
  2. (defun get-value (pair) (car pair))
  3. (defun get-weight (pair) (cdr pair))
  4. (defun get-value-count-pairs (values)
  5. (let ((sorted (sort (copy-list values) #'<)))
  6. (loop for x in (rest sorted)
  7. with val = (first sorted)
  8. with cnt = 1
  9. if (< val x) collect (make-pair val cnt) into res and do (setf cnt 1 val x)
  10. else do (incf cnt)
  11. finally (return (append res (list (make-pair val cnt)))))))
  12. (defun calc-weightened (pairs)
  13. (loop for (value . weight) in pairs
  14. summing weight into acc-weight
  15. summing (* value weight) into acc-weightened
  16. collect (make-pair acc-weightened acc-weight)))
  17. (defun jenks-fisher (k values)
  18. (declare (optimize (debug 2)))
  19. (when (> k 1)
  20. (let* ((pairs (get-value-count-pairs values))
  21. (m (length pairs))
  22. (buf-size (- m (1- k)))
  23. (weightened (calc-weightened pairs))
  24. (prev-ssm (make-array buf-size))
  25. (cur-ssm (make-array buf-size))
  26. (cb (make-array (list (1- k) buf-size)))
  27. (compl 1))
  28. (labels ((get-wv (b e)
  29. (assert (not (zerop b)))
  30. (assert (<= b e))
  31. (assert (< e m))
  32. (- (get-value (elt weightened e)) (get-value (elt weightened (1- b)))))
  33. (get-w (b e)
  34. (assert (not (zerop b)))
  35. (assert (<= b e))
  36. (assert (< e m))
  37. (- (get-weight (elt weightened e)) (get-weight (elt weightened (1- b)))))
  38. (get-ssm (b e)
  39. (/ (expt (get-wv b e) 2) (get-w b e)))
  40. (calc-ssm (p i)
  41. (+ (elt prev-ssm p) (get-ssm (+ p compl) (+ i compl))))
  42. (find-max-break-index (i bp ep)
  43. (assert (< bp ep))
  44. (assert (<= bp i))
  45. (assert (<= ep (1+ i)))
  46. (assert (< i buf-size))
  47. (assert (<= ep buf-size))
  48. (let ((found-point bp)
  49. (max-ssm (calc-ssm bp i)))
  50. (loop for point from (1+ bp) below ep
  51. for ssm = (calc-ssm point i)
  52. when (> ssm max-ssm)
  53. do (setf max-ssm ssm found-point point))
  54. (setf (elt cur-ssm i) max-ssm)
  55. found-point))
  56. (calc-range (bi ei bp ep)
  57. (assert (<= bp bi ei))
  58. (assert (<= ep ei))
  59. (unless (= bi ei)
  60. (assert (< bp ep))
  61. (let* ((mi (floor (+ bi ei) 2))
  62. (mp (find-max-break-index mi bp (min ep (1+ mi)))))
  63. (assert (<= bp mp mi))
  64. (assert (< mp ep))
  65. (calc-range bi mi bp (min mi (1+ mp)))
  66. (setf (aref cb (1- compl) mi) mp)
  67. (calc-range (1+ mi) ei mp ep)))))
  68. ;; Init first prev-ssm
  69. (loop for i below buf-size
  70. for (value . weight) in weightened
  71. do (setf (elt prev-ssm i)
  72. (/ (expt value 2) weight)))
  73. ;; Calc all
  74. (dotimes (i (- k 2))
  75. (setf compl (1+ i))
  76. (calc-range 0 buf-size 0 buf-size)
  77. (rotatef prev-ssm cur-ssm))
  78. (cons (get-value (elt pairs 0))
  79. (reverse
  80. (loop for idx from (1- k) downto 1
  81. with last-bi = (find-max-break-index (1- buf-size) 0 buf-size)
  82. collect (get-value (elt pairs (+ last-bi idx)))
  83. do (assert (< last-bi buf-size))
  84. when (> idx 1)
  85. do (setf last-bi (aref cb (1- idx) last-bi)))))))))