rss.lisp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. (in-package #:chatikbot)
  2. (defstruct feed url title seen-guids next-fetch (period 300))
  3. (defstruct feed-item guid link title description published)
  4. (defparameter *rss-min-period* 60 "Min rss refresh period in seconds")
  5. (defparameter *rss-max-period* 1800 "Max rss refresh period in seconds")
  6. (defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata")
  7. (defun http-default (url)
  8. (let ((uri (puri:uri url)))
  9. (puri:render-uri
  10. (if (null (puri:uri-scheme uri))
  11. (puri:uri (format nil "http://~A" url))
  12. uri)
  13. nil)))
  14. (defun get-by-tag (node tag)
  15. (nreverse (plump:get-elements-by-tag-name node tag)))
  16. (defun url-parse (url)
  17. (multiple-value-bind (body status headers uri stream)
  18. (drakma:http-request (http-default url) :force-binary t)
  19. (declare (ignore status headers stream))
  20. (values
  21. (plump:parse (flexi-streams:octets-to-string body :external-format :utf-8))
  22. uri)))
  23. (defun find-rss-links (url)
  24. (handler-case
  25. (multiple-value-bind (root uri) (url-parse url)
  26. (loop for link in (get-by-tag root "link")
  27. when (string= "application/rss+xml" (plump:attribute link "type"))
  28. collect (list (plump:attribute link "title")
  29. (puri:render-uri
  30. (puri:merge-uris
  31. (puri:uri (plump:attribute link "href"))
  32. uri) nil))))
  33. (error (e) (log:error e))))
  34. (defun build-feed (url)
  35. (let ((root (url-parse url)))
  36. (alexandria:when-let (rss (car (get-by-tag root "rss")))
  37. (make-feed :url url :title (child-text rss "title")))))
  38. (defun adjust-period (feed coeff)
  39. "Adjust the period of feed based on whenever there were new items. With clamping"
  40. (let* ((p (feed-period feed))
  41. (diff (round (* p *rss-change-rate*))))
  42. (setf (feed-period feed)
  43. (min *rss-max-period*
  44. (max *rss-min-period*
  45. (- p (* coeff diff)))))))
  46. (defun need-fetch-p (feed)
  47. (or (null (feed-next-fetch feed))
  48. (local-time:timestamp> (local-time:now) (feed-next-fetch feed))))
  49. (defun fetch-new-items (feed)
  50. (let ((items
  51. (loop for item in (parse-rss (feed-url feed))
  52. unless (member (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
  53. do (pushnew (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
  54. and collect item)))
  55. (adjust-period feed (if (consp items) (length items) -1))
  56. (setf (feed-next-fetch feed)
  57. (local-time:timestamp+ (local-time:now)
  58. (feed-period feed)
  59. :sec))
  60. items))
  61. (defun trim-nil (text)
  62. (when text
  63. (let ((text (string-trim " " text)))
  64. (unless (zerop (length text))
  65. text))))
  66. (defun text-with-cdata (node)
  67. "Compiles all text nodes within the nesting-node into one string."
  68. (with-output-to-string (stream)
  69. (labels ((r (node)
  70. (loop for child across (plump:children node)
  71. do (typecase child
  72. (plump:text-node (write-string (plump:text child) stream))
  73. (plump:cdata (write-string (plump:text child) stream))
  74. (plump:nesting-node (r child))))))
  75. (r node))))
  76. (defun child-text (node tag)
  77. (alexandria:when-let (child (car (get-by-tag node tag)))
  78. (trim-nil (text-with-cdata child))))
  79. (defun clean-text (text)
  80. (when text (trim-nil (plump:text (plump:parse text)))))
  81. (defun parse-rss (url)
  82. (let ((plump:*tag-dispatchers* plump:*xml-tags*))
  83. (loop for item in (get-by-tag (url-parse url) "item")
  84. collect (make-feed-item :guid (child-text item "guid")
  85. :link (child-text item "link")
  86. :title (clean-text (child-text item "title"))
  87. :description (clean-text (child-text item "description"))
  88. :published (local-time:universal-to-timestamp
  89. (date-time-parser:parse-date-time (child-text item "pubDate")))))))
  90. (defun format-feed-item (feed item)
  91. (format nil "~A~@[ @ ~A~]~%~A~%~A"
  92. (feed-title feed)
  93. (alexandria:when-let (ts (feed-item-published item))
  94. (local-time:format-timestring
  95. nil ts
  96. :format '((:year 2) "-" (:month 2) "-" (:day 2)
  97. " " (:hour 2) ":" (:min 2))))
  98. (feed-item-title item)
  99. (feed-item-link item)))