浏览代码

RSS feeds with quasi-smart feed fetching

Innocenty Enikeew 10 年之前
父节点
当前提交
05c02181cd
共有 4 个文件被更改,包括 185 次插入5 次删除
  1. 4 1
      .gitignore
  2. 4 3
      chatikbot.asd
  3. 83 1
      chatikbot.lisp
  4. 94 0
      rss.lisp

+ 4 - 1
.gitignore

@@ -1 +1,4 @@
-config.lisp
+config.lisp
+settings.lisp
+chart.png
+*.fasl

+ 4 - 3
chatikbot.asd

@@ -6,19 +6,19 @@
   :depends-on (#:alexandria
                #:adw-charting-vecto
                #:bordeaux-threads
-;;               #:cl-oauth
+               #:cl-date-time-parser
                #:clon
                #:dexador
                #:drakma
                #:flexi-streams
                #:local-time
                #:log4cl
+               #:plump
                #:trivial-utf-8
                #:yason)
   :serial t
   :components ((:file "package")
                (:file "utils")
-;;               (:file "twitter")
                (:file "telegram")
                (:file "forecast")
                (:file "vk")
@@ -26,5 +26,6 @@
                (:file "tumblr")
                (:file "patmatch")
                (:file "eliza")
-	       (:file "foursquare")
+               (:file "foursquare")
+               (:file "rss")
                (:file "chatikbot")))

+ 83 - 1
chatikbot.lisp

@@ -71,6 +71,7 @@
               (:postcheckins (handle-cmd-post-checkins chat-id id args))
               (:friends (handle-cmd-fsq-friends chat-id id args))
               (:checkins (handle-cmd-checkins chat-id id args))
+              (:rss (handle-cmd-rss chat-id id args))
               (otherwise (handle-admin-cmd chat-id text cmd args))))
           (send-dont-understand chat-id (preprocess-input text))))
     (when location
@@ -322,6 +323,80 @@
     (error (e) (log:error e))))
 
 
+;; RSS
+(defvar *rss-feeds* nil "All aggragated RSS feeds")
+(defvar *rss-chat-feeds* (make-hash-table) "Chat->Feeds mapping")
+
+(defun %send-feeds (chat-id feeds)
+  (telegram-send-message
+   chat-id
+   (if (null feeds)
+       "Пока ничего не постим"
+       (format nil "Постим:~%~{~A (~A)~^~%~}"
+               (loop for feed in feeds
+                  append (list (feed-title feed) (feed-url feed)))))))
+
+(defun %get-feed (url)
+  (when url
+    (or (find url *rss-feeds* :key #'feed-url :test #'equal)
+        (let ((feed (build-feed url)))
+          (fetch-new-items feed)
+          (push feed *rss-feeds*)
+          feed))))
+
+(defun %used-feed-p (feed)
+  (loop for feeds being the hash-values in *rss-chat-feeds*
+     when (member feed feeds)
+     do (return t)))
+
+(defun %refresh-feeds ()
+  (setf *rss-feeds*
+        (remove-if-not #'%used-feed-p *rss-feeds*)))
+
+(defun handle-cmd-rss (chat-id message-id args)
+  (log:info "handle-cmd-rss" chat-id message-id args)
+  (handler-case
+      (let ((feeds (gethash chat-id *rss-chat-feeds*)))
+        (if (null args)
+            (%send-feeds chat-id feeds)
+            (progn
+              (dolist (url args)
+                (alexandria:when-let ((feed (%get-feed (find-rss-url url))))
+                  (if (member feed feeds)
+                      (setf feeds (remove feed feeds))
+                      (push feed feeds))))
+              (setf (gethash chat-id *rss-chat-feeds*) feeds)
+              (%refresh-feeds)
+              (save-settings)
+              (%send-feeds chat-id feeds))))
+    (error (e)
+      (log:error e)
+      (telegram-send-message chat-id "Ошибочка вышла"))))
+
+(defun %feed-send-to (feed)
+  (loop for chat-id being the hash-keys in *rss-chat-feeds* using (hash-value feeds)
+     when (member feed feeds)
+     collect chat-id))
+
+(defun process-feeds ()
+  (handler-case
+      (dolist (feed (remove-if-not #'need-fetch-p *rss-feeds*))
+        (log:info "Fetching new items" (feed-url feed))
+        (dolist (item (fetch-new-items feed))
+          (dolist (chat-id (%feed-send-to feed))
+            (telegram-send-message chat-id (format-feed-item item)))))
+    (error (e) (log:error e))))
+
+(defun %load-rss-feeds (alist)
+  (alexandria:alist-hash-table
+   (loop for (chat-id . urls) in alist
+      collect (cons chat-id (mapcar #'%get-feed urls)))))
+
+(defun %save-rss-feeds ()
+  (loop for chat-id being the hash-keys in *rss-chat-feeds* using (hash-value feeds)
+     collect (cons chat-id (mapcar #'feed-url feeds))))
+
+
 (defun save-settings()
   (with-open-file (s (merge-pathnames "settings.lisp"
                                       (asdf:component-pathname
@@ -334,7 +409,8 @@
      `(setf *fsq-send-to* (alexandria:alist-hash-table ',(alexandria:hash-table-alist *fsq-send-to*))
             *chat-locations* ',*chat-locations*
             *akb-send-to* ',*akb-send-to*
-            *akb-last-id* ,*akb-last-id*)
+            *akb-last-id* ,*akb-last-id*
+            *rss-chat-feeds* (%load-rss-feeds ',(%save-rss-feeds)))
      :stream s)))
 
 (defun start ()
@@ -371,6 +447,12 @@
     (clon:make-typed-cron-schedule :minute '* :hour '*)
     :allow-now-p t)
    :thread t)
+  (clon:schedule-function
+   (lambda () (process-feeds))
+   (clon:make-scheduler
+    (clon:make-typed-cron-schedule :minute '* :hour '*)
+    :allow-now-p t)
+   :thread t)
   ;; Start getUpdates thread
   (bordeaux-threads:make-thread
    (lambda ()

+ 94 - 0
rss.lisp

@@ -0,0 +1,94 @@
+(in-package #:chatikbot)
+
+(defstruct feed url title seen-guids next-fetch (period 300))
+(defstruct feed-item guid link title description published)
+
+(defparameter *rss-min-period* 60 "Min rss refresh period in seconds")
+(defparameter *rss-max-period* 600 "Min rss refresh period in seconds")
+(defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata")
+
+(defun find-rss-url (url)
+  (ignore-errors
+    (multiple-value-bind (body status headers uri stream) (dex:get url)
+      (declare (ignore status stream))
+      (let ((root (plump:parse body))
+            (content-type (gethash "content-type" headers)))
+        (cond
+          ((alexandria:starts-with-subseq "text/html" content-type)
+           (loop for link in (plump:get-elements-by-tag-name root "link")
+              when (string= "application/rss+xml" (plump:attribute link "type"))
+              do (return (values (plump:attribute link "href")))))
+          ((string= "rss" (plump:tag-name (plump:first-element root)))
+           (quri:render-uri uri)))))))
+
+(defun build-feed (url)
+  (alexandria:when-let* ((uri (find-rss-url url))
+                         (root (plump:parse (dex:get uri))))
+    (make-feed :url uri :title (child-text root "title"))))
+
+(defun adjust-period (feed had-new?)
+  "Adjust the period of feed based on whenever there were new items. With clamping"
+  (let* ((p (feed-period feed))
+         (diff (round (* p *rss-change-rate*))))
+    (setf (feed-period feed)
+          (min *rss-max-period*
+               (max *rss-min-period*
+                    (if had-new? (- p diff) (+ p diff)))))))
+
+(defun need-fetch-p (feed)
+  (or (null (feed-next-fetch feed))
+      (local-time:timestamp> (local-time:now) (feed-next-fetch feed))))
+
+(defun fetch-new-items (feed)
+  (let ((items
+         (loop for item in (parse-rss (feed-url feed))
+            unless (member (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
+            do (pushnew (feed-item-guid item) (feed-seen-guids feed) :test #'equal)
+            and collect item)))
+    (adjust-period feed (consp items))
+    (setf (feed-next-fetch feed)
+          (local-time:timestamp+ (local-time:now)
+                                 (feed-period feed)
+                                 :sec))
+    items))
+
+(defun text-with-cdata (node)
+  "Compiles all text nodes within the nesting-node into one string."
+  (with-output-to-string (stream)
+    (labels ((r (node)
+               (loop for child across (plump:children node)
+                  do (typecase child
+                       (plump:text-node (write-string (plump:text child) stream))
+                       (plump:cdata (write-string (plump:text child) stream))
+                       (plump:nesting-node (r child))))))
+      (r node))))
+
+(defun child-text (node tag)
+  (alexandria:when-let (child (car (nreverse
+                                    (plump:get-elements-by-tag-name node tag))))
+    (text-with-cdata child)))
+
+(defun clean-text (text)
+  (when text (plump:text (plump:parse text))))
+
+(defun parse-rss (url)
+  (let ((plump:*tag-dispatchers* plump:*xml-tags*))
+    (loop for item in (plump:get-elements-by-tag-name
+                       (plump:parse (dex:get url))
+                       "item")
+       collect (make-feed-item :guid (child-text item "guid")
+                               :link (child-text item "link")
+                               :title (clean-text (child-text item "title"))
+                               :description (clean-text (child-text item "description"))
+                               :published (local-time:universal-to-timestamp
+                                           (date-time-parser:parse-date-time (child-text item "pubDate")))))))
+
+(defun format-feed-item (item)
+  (format nil "~A~@[ @ ~A~]~@[~%~%~A~]~%~%~A"
+          (feed-item-title item)
+          (alexandria:when-let (ts (feed-item-published item))
+            (local-time:format-timestring
+             nil ts
+             :format '((:year 2) "-" (:month 2) "-" (:day 2) " " (:hour 2) ":" (:min 2))))
+          (feed-item-description item)
+          (feed-item-link item)))