| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- (in-package #:chatikbot)
- (defstruct feed url title seen-guids next-fetch (period 300))
- (defstruct feed-item guid link title description published)
- (defparameter *rss-min-period* 60 "Min rss refresh period in seconds")
- (defparameter *rss-max-period* 1800 "Max rss refresh period in seconds")
- (defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata")
- (defun http-default (url)
- (let ((uri (quri:uri url)))
- (quri:render-uri
- (if (null (quri:uri-scheme uri))
- (quri:uri (format nil "http://~A" url))
- uri))))
- (defun get-by-tag (node tag)
- (nreverse (plump:get-elements-by-tag-name node tag)))
- (defun url-parse (url)
- (plump:parse (flexi-streams:octets-to-string
- (dex:get (http-default url) :force-binary t) :external-format :utf-8)))
- (defun find-rss-url (url)
- (ignore-errors
- (multiple-value-bind (body status headers uri stream)
- (dex:get (http-default url) :force-binary t)
- (declare (ignore status stream))
- (let ((root (plump:parse (flexi-streams:octets-to-string body :external-format :utf-8)))
- (content-type (gethash "content-type" headers)))
- (cond
- ((alexandria:starts-with-subseq "text/html" content-type)
- (loop for link in (get-by-tag root "link")
- when (string= "application/rss+xml" (plump:attribute link "type"))
- do (return (quri:render-uri
- (quri:merge-uris (quri:uri (plump:attribute link "href"))
- uri)))))
- ((string= "rss" (plump:tag-name (plump:first-element root)))
- (quri:render-uri uri)))))))
- (defun build-feed (url)
- (let ((root (url-parse url)))
- (make-feed :url url :title (child-text root "title"))))
- (defun adjust-period (feed coeff)
- "Adjust the period of feed based on whenever there were new items. With clamping"
- (let* ((p (feed-period feed))
- (diff (round (* p *rss-change-rate*))))
- (setf (feed-period feed)
- (min *rss-max-period*
- (max *rss-min-period*
- (- p (* coeff diff)))))))
- (defun need-fetch-p (feed)
- (or (null (feed-next-fetch feed))
- (local-time:timestamp> (local-time:now) (feed-next-fetch feed))))
- (defun fetch-new-items (feed)
- (let ((items
- (loop for item in (parse-rss (feed-url feed))
- unless (member (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
- do (pushnew (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
- and collect item)))
- (adjust-period feed (if (consp items) (length items) -1))
- (setf (feed-next-fetch feed)
- (local-time:timestamp+ (local-time:now)
- (feed-period feed)
- :sec))
- items))
- (defun trim-nil (text)
- (when text
- (let ((text (string-trim " " text)))
- (unless (zerop (length text))
- text))))
- (defun text-with-cdata (node)
- "Compiles all text nodes within the nesting-node into one string."
- (with-output-to-string (stream)
- (labels ((r (node)
- (loop for child across (plump:children node)
- do (typecase child
- (plump:text-node (write-string (plump:text child) stream))
- (plump:cdata (write-string (plump:text child) stream))
- (plump:nesting-node (r child))))))
- (r node))))
- (defun child-text (node tag)
- (alexandria:when-let (child (car (get-by-tag node tag)))
- (trim-nil (text-with-cdata child))))
- (defun clean-text (text)
- (when text (trim-nil (plump:text (plump:parse text)))))
- (defun parse-rss (url)
- (let ((plump:*tag-dispatchers* plump:*xml-tags*))
- (loop for item in (get-by-tag (url-parse url) "item")
- collect (make-feed-item :guid (child-text item "guid")
- :link (child-text item "link")
- :title (clean-text (child-text item "title"))
- :description (clean-text (child-text item "description"))
- :published (local-time:universal-to-timestamp
- (date-time-parser:parse-date-time (child-text item "pubDate")))))))
- (defun format-feed-item (item)
- (format nil "~A~@[ @ ~A~]~@[~%~%~A~]~%~%~A"
- (feed-item-title item)
- (alexandria:when-let (ts (feed-item-published item))
- (local-time:format-timestring
- nil ts
- :format '((:year 2) "-" (:month 2) "-" (:day 2) " " (:hour 2) ":" (:min 2))))
- (feed-item-description item)
- (feed-item-link item)))
|