rss.lisp 4.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. (in-package #:chatikbot)
  2. (defstruct feed url title seen-guids next-fetch (period 300))
  3. (defstruct feed-item guid link title description published)
  4. (defparameter *rss-min-period* 60 "Min rss refresh period in seconds")
  5. (defparameter *rss-max-period* 600 "Min rss refresh period in seconds")
  6. (defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata")
  7. (defun find-rss-url (url)
  8. (ignore-errors
  9. (multiple-value-bind (body status headers uri stream) (dex:get url)
  10. (declare (ignore status stream))
  11. (let ((root (plump:parse body))
  12. (content-type (gethash "content-type" headers)))
  13. (cond
  14. ((alexandria:starts-with-subseq "text/html" content-type)
  15. (loop for link in (plump:get-elements-by-tag-name root "link")
  16. when (string= "application/rss+xml" (plump:attribute link "type"))
  17. do (return (values (plump:attribute link "href")))))
  18. ((string= "rss" (plump:tag-name (plump:first-element root)))
  19. (quri:render-uri uri)))))))
  20. (defun build-feed (url)
  21. (alexandria:when-let* ((uri (find-rss-url url))
  22. (root (plump:parse (dex:get uri))))
  23. (make-feed :url uri :title (child-text root "title"))))
  24. (defun adjust-period (feed had-new?)
  25. "Adjust the period of feed based on whenever there were new items. With clamping"
  26. (let* ((p (feed-period feed))
  27. (diff (round (* p *rss-change-rate*))))
  28. (setf (feed-period feed)
  29. (min *rss-max-period*
  30. (max *rss-min-period*
  31. (if had-new? (- p diff) (+ p diff)))))))
  32. (defun need-fetch-p (feed)
  33. (or (null (feed-next-fetch feed))
  34. (local-time:timestamp> (local-time:now) (feed-next-fetch feed))))
  35. (defun fetch-new-items (feed)
  36. (let ((items
  37. (loop for item in (parse-rss (feed-url feed))
  38. unless (member (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
  39. do (pushnew (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
  40. and collect item)))
  41. (adjust-period feed (consp items))
  42. (setf (feed-next-fetch feed)
  43. (local-time:timestamp+ (local-time:now)
  44. (feed-period feed)
  45. :sec))
  46. items))
  47. (defun text-with-cdata (node)
  48. "Compiles all text nodes within the nesting-node into one string."
  49. (with-output-to-string (stream)
  50. (labels ((r (node)
  51. (loop for child across (plump:children node)
  52. do (typecase child
  53. (plump:text-node (write-string (plump:text child) stream))
  54. (plump:cdata (write-string (plump:text child) stream))
  55. (plump:nesting-node (r child))))))
  56. (r node))))
  57. (defun child-text (node tag)
  58. (alexandria:when-let (child (car (nreverse
  59. (plump:get-elements-by-tag-name node tag))))
  60. (text-with-cdata child)))
  61. (defun clean-text (text)
  62. (when text (plump:text (plump:parse text))))
  63. (defun parse-rss (url)
  64. (let ((plump:*tag-dispatchers* plump:*xml-tags*))
  65. (loop for item in (plump:get-elements-by-tag-name
  66. (plump:parse (dex:get url))
  67. "item")
  68. collect (make-feed-item :guid (child-text item "guid")
  69. :link (child-text item "link")
  70. :title (clean-text (child-text item "title"))
  71. :description (clean-text (child-text item "description"))
  72. :published (local-time:universal-to-timestamp
  73. (date-time-parser:parse-date-time (child-text item "pubDate")))))))
  74. (defun format-feed-item (item)
  75. (format nil "~A~@[ @ ~A~]~@[~%~%~A~]~%~%~A"
  76. (feed-item-title item)
  77. (alexandria:when-let (ts (feed-item-published item))
  78. (local-time:format-timestring
  79. nil ts
  80. :format '((:year 2) "-" (:month 2) "-" (:day 2) " " (:hour 2) ":" (:min 2))))
  81. (feed-item-description item)
  82. (feed-item-link item)))