|
|
@@ -0,0 +1,94 @@
|
|
|
+(in-package #:chatikbot)
|
|
|
+
|
|
|
+(defstruct feed url title seen-guids next-fetch (period 300))
|
|
|
+(defstruct feed-item guid link title description published)
|
|
|
+
|
|
|
+(defparameter *rss-min-period* 60 "Min rss refresh period in seconds")
|
|
|
+(defparameter *rss-max-period* 600 "Min rss refresh period in seconds")
|
|
|
+(defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata")
|
|
|
+
|
|
|
+(defun find-rss-url (url)
|
|
|
+ (ignore-errors
|
|
|
+ (multiple-value-bind (body status headers uri stream) (dex:get url)
|
|
|
+ (declare (ignore status stream))
|
|
|
+ (let ((root (plump:parse body))
|
|
|
+ (content-type (gethash "content-type" headers)))
|
|
|
+ (cond
|
|
|
+ ((alexandria:starts-with-subseq "text/html" content-type)
|
|
|
+ (loop for link in (plump:get-elements-by-tag-name root "link")
|
|
|
+ when (string= "application/rss+xml" (plump:attribute link "type"))
|
|
|
+ do (return (values (plump:attribute link "href")))))
|
|
|
+ ((string= "rss" (plump:tag-name (plump:first-element root)))
|
|
|
+ (quri:render-uri uri)))))))
|
|
|
+
|
|
|
+(defun build-feed (url)
|
|
|
+ (alexandria:when-let* ((uri (find-rss-url url))
|
|
|
+ (root (plump:parse (dex:get uri))))
|
|
|
+ (make-feed :url uri :title (child-text root "title"))))
|
|
|
+
|
|
|
+(defun adjust-period (feed had-new?)
|
|
|
+ "Adjust the period of feed based on whenever there were new items. With clamping"
|
|
|
+ (let* ((p (feed-period feed))
|
|
|
+ (diff (round (* p *rss-change-rate*))))
|
|
|
+ (setf (feed-period feed)
|
|
|
+ (min *rss-max-period*
|
|
|
+ (max *rss-min-period*
|
|
|
+ (if had-new? (- p diff) (+ p diff)))))))
|
|
|
+
|
|
|
+(defun need-fetch-p (feed)
|
|
|
+ (or (null (feed-next-fetch feed))
|
|
|
+ (local-time:timestamp> (local-time:now) (feed-next-fetch feed))))
|
|
|
+
|
|
|
+(defun fetch-new-items (feed)
|
|
|
+ (let ((items
|
|
|
+ (loop for item in (parse-rss (feed-url feed))
|
|
|
+ unless (member (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
|
|
|
+ do (pushnew (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
|
|
|
+ and collect item)))
|
|
|
+ (adjust-period feed (consp items))
|
|
|
+ (setf (feed-next-fetch feed)
|
|
|
+ (local-time:timestamp+ (local-time:now)
|
|
|
+ (feed-period feed)
|
|
|
+ :sec))
|
|
|
+ items))
|
|
|
+
|
|
|
+(defun text-with-cdata (node)
|
|
|
+ "Compiles all text nodes within the nesting-node into one string."
|
|
|
+ (with-output-to-string (stream)
|
|
|
+ (labels ((r (node)
|
|
|
+ (loop for child across (plump:children node)
|
|
|
+ do (typecase child
|
|
|
+ (plump:text-node (write-string (plump:text child) stream))
|
|
|
+ (plump:cdata (write-string (plump:text child) stream))
|
|
|
+ (plump:nesting-node (r child))))))
|
|
|
+ (r node))))
|
|
|
+
|
|
|
+(defun child-text (node tag)
|
|
|
+ (alexandria:when-let (child (car (nreverse
|
|
|
+ (plump:get-elements-by-tag-name node tag))))
|
|
|
+ (text-with-cdata child)))
|
|
|
+
|
|
|
+(defun clean-text (text)
|
|
|
+ (when text (plump:text (plump:parse text))))
|
|
|
+
|
|
|
+(defun parse-rss (url)
|
|
|
+ (let ((plump:*tag-dispatchers* plump:*xml-tags*))
|
|
|
+ (loop for item in (plump:get-elements-by-tag-name
|
|
|
+ (plump:parse (dex:get url))
|
|
|
+ "item")
|
|
|
+ collect (make-feed-item :guid (child-text item "guid")
|
|
|
+ :link (child-text item "link")
|
|
|
+ :title (clean-text (child-text item "title"))
|
|
|
+ :description (clean-text (child-text item "description"))
|
|
|
+ :published (local-time:universal-to-timestamp
|
|
|
+ (date-time-parser:parse-date-time (child-text item "pubDate")))))))
|
|
|
+
|
|
|
+(defun format-feed-item (item)
|
|
|
+ (format nil "~A~@[ @ ~A~]~@[~%~%~A~]~%~%~A"
|
|
|
+ (feed-item-title item)
|
|
|
+ (alexandria:when-let (ts (feed-item-published item))
|
|
|
+ (local-time:format-timestring
|
|
|
+ nil ts
|
|
|
+ :format '((:year 2) "-" (:month 2) "-" (:day 2) " " (:hour 2) ":" (:min 2))))
|
|
|
+ (feed-item-description item)
|
|
|
+ (feed-item-link item)))
|