Innocenty Enikeew 10 anni fa
parent
commit
6e589ea4a9
1 ha cambiato i file con 27 aggiunte e 9 eliminazioni
  1. 27 9
      rss.lisp

+ 27 - 9
rss.lisp

@@ -7,24 +7,34 @@
 (defparameter *rss-max-period* 600 "Min rss refresh period in seconds")
 (defparameter *rss-max-period* 600 "Min rss refresh period in seconds")
 (defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata")
 (defparameter *rss-change-rate* 0.1 "Refresh period adjustment rata")
 
 
+(defun http-default (url)
+  (let ((uri (quri:uri url)))
+    (quri:render-uri
+     (if (null (quri:uri-scheme uri))
+         (quri:uri (format nil "http://~A" url))
+         uri))))
+
 (defun find-rss-url (url)
 (defun find-rss-url (url)
   (ignore-errors
   (ignore-errors
-    (multiple-value-bind (body status headers uri stream) (dex:get url)
+    (multiple-value-bind (body status headers uri stream)
+        (dex:get (http-default url) :force-binary t)
       (declare (ignore status stream))
       (declare (ignore status stream))
-      (let ((root (plump:parse body))
+      (let ((root (plump:parse (flexi-streams:octets-to-string body :external-format :utf-8)))
             (content-type (gethash "content-type" headers)))
             (content-type (gethash "content-type" headers)))
         (cond
         (cond
           ((alexandria:starts-with-subseq "text/html" content-type)
           ((alexandria:starts-with-subseq "text/html" content-type)
            (loop for link in (plump:get-elements-by-tag-name root "link")
            (loop for link in (plump:get-elements-by-tag-name root "link")
               when (string= "application/rss+xml" (plump:attribute link "type"))
               when (string= "application/rss+xml" (plump:attribute link "type"))
-              do (return (values (plump:attribute link "href")))))
+              do (return (quri:render-uri
+                          (quri:merge-uris (quri:uri (plump:attribute link "href"))
+                                           uri)))))
           ((string= "rss" (plump:tag-name (plump:first-element root)))
           ((string= "rss" (plump:tag-name (plump:first-element root)))
            (quri:render-uri uri)))))))
            (quri:render-uri uri)))))))
 
 
 (defun build-feed (url)
 (defun build-feed (url)
-  (alexandria:when-let* ((uri (find-rss-url url))
-                         (root (plump:parse (dex:get uri))))
-    (make-feed :url uri :title (child-text root "title"))))
+  (let ((root (plump:parse (flexi-streams:octets-to-string
+                            (dex:get url :force-binary t) :external-format :utf-8))))
+    (make-feed :url url :title (child-text root "title"))))
 
 
 (defun adjust-period (feed had-new?)
 (defun adjust-period (feed had-new?)
   "Adjust the period of feed based on whenever there were new items. With clamping"
   "Adjust the period of feed based on whenever there were new items. With clamping"
@@ -52,6 +62,12 @@
                                  :sec))
                                  :sec))
     items))
     items))
 
 
+(defun trim-nil (text)
+  (when text
+    (let ((text (string-trim " " text)))
+      (unless (zerop (length text))
+        text))))
+
 (defun text-with-cdata (node)
 (defun text-with-cdata (node)
   "Compiles all text nodes within the nesting-node into one string."
   "Compiles all text nodes within the nesting-node into one string."
   (with-output-to-string (stream)
   (with-output-to-string (stream)
@@ -66,15 +82,17 @@
 (defun child-text (node tag)
 (defun child-text (node tag)
   (alexandria:when-let (child (car (nreverse
   (alexandria:when-let (child (car (nreverse
                                     (plump:get-elements-by-tag-name node tag))))
                                     (plump:get-elements-by-tag-name node tag))))
-    (text-with-cdata child)))
+    (trim-nil (text-with-cdata child))))
 
 
 (defun clean-text (text)
 (defun clean-text (text)
-  (when text (plump:text (plump:parse text))))
+  (when text (trim-nil (plump:text (plump:parse text)))))
 
 
 (defun parse-rss (url)
 (defun parse-rss (url)
   (let ((plump:*tag-dispatchers* plump:*xml-tags*))
   (let ((plump:*tag-dispatchers* plump:*xml-tags*))
     (loop for item in (plump:get-elements-by-tag-name
     (loop for item in (plump:get-elements-by-tag-name
-                       (plump:parse (dex:get url))
+                       (plump:parse
+                        (flexi-streams:octets-to-string
+                         (dex:get url :force-binary t) :external-format :utf-8))
                        "item")
                        "item")
        collect (make-feed-item :guid (child-text item "guid")
        collect (make-feed-item :guid (child-text item "guid")
                                :link (child-text item "link")
                                :link (child-text item "link")