(in-package #:chatikbot) (defstruct feed url title seen-guids next-fetch (period 300)) (defstruct feed-item guid link title description published) (defparameter *rss-min-period* 60 "Min rss refresh period in seconds") (defparameter *rss-max-period* 600 "Min rss refresh period in seconds") (defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata") (defun http-default (url) (let ((uri (quri:uri url))) (quri:render-uri (if (null (quri:uri-scheme uri)) (quri:uri (format nil "http://~A" url)) uri)))) (defun find-rss-url (url) (ignore-errors (multiple-value-bind (body status headers uri stream) (dex:get (http-default url) :force-binary t) (declare (ignore status stream)) (let ((root (plump:parse (flexi-streams:octets-to-string body :external-format :utf-8))) (content-type (gethash "content-type" headers))) (cond ((alexandria:starts-with-subseq "text/html" content-type) (loop for link in (plump:get-elements-by-tag-name root "link") when (string= "application/rss+xml" (plump:attribute link "type")) do (return (quri:render-uri (quri:merge-uris (quri:uri (plump:attribute link "href")) uri))))) ((string= "rss" (plump:tag-name (plump:first-element root))) (quri:render-uri uri))))))) (defun build-feed (url) (let ((root (plump:parse (flexi-streams:octets-to-string (dex:get url :force-binary t) :external-format :utf-8)))) (make-feed :url url :title (child-text root "title")))) (defun adjust-period (feed had-new?) "Adjust the period of feed based on whenever there were new items. With clamping" (let* ((p (feed-period feed)) (diff (round (* p *rss-change-rate*)))) (setf (feed-period feed) (min *rss-max-period* (max *rss-min-period* (if had-new? (- p diff) (+ p diff))))))) (defun need-fetch-p (feed) (or (null (feed-next-fetch feed)) (local-time:timestamp> (local-time:now) (feed-next-fetch feed)))) (defun fetch-new-items (feed) (let ((items (loop for item in (parse-rss (feed-url feed)) unless (member (feed-item-guid item) (feed-seen-guids feed) :test #'equal) do (pushnew (feed-item-guid item) (feed-seen-guids feed) :test #'equal) and collect item))) (adjust-period feed (consp items)) (setf (feed-next-fetch feed) (local-time:timestamp+ (local-time:now) (feed-period feed) :sec)) items)) (defun trim-nil (text) (when text (let ((text (string-trim " " text))) (unless (zerop (length text)) text)))) (defun text-with-cdata (node) "Compiles all text nodes within the nesting-node into one string." (with-output-to-string (stream) (labels ((r (node) (loop for child across (plump:children node) do (typecase child (plump:text-node (write-string (plump:text child) stream)) (plump:cdata (write-string (plump:text child) stream)) (plump:nesting-node (r child)))))) (r node)))) (defun child-text (node tag) (alexandria:when-let (child (car (nreverse (plump:get-elements-by-tag-name node tag)))) (trim-nil (text-with-cdata child)))) (defun clean-text (text) (when text (trim-nil (plump:text (plump:parse text))))) (defun parse-rss (url) (let ((plump:*tag-dispatchers* plump:*xml-tags*)) (loop for item in (plump:get-elements-by-tag-name (plump:parse (flexi-streams:octets-to-string (dex:get url :force-binary t) :external-format :utf-8)) "item") collect (make-feed-item :guid (child-text item "guid") :link (child-text item "link") :title (clean-text (child-text item "title")) :description (clean-text (child-text item "description")) :published (local-time:universal-to-timestamp (date-time-parser:parse-date-time (child-text item "pubDate"))))))) (defun format-feed-item (item) (format nil "~A~@[ @ ~A~]~@[~%~%~A~]~%~%~A" (feed-item-title item) (alexandria:when-let (ts (feed-item-published item)) (local-time:format-timestring nil ts :format '((:year 2) "-" (:month 2) "-" (:day 2) " " (:hour 2) ":" (:min 2)))) (feed-item-description item) (feed-item-link item)))