Selaa lähdekoodia

[scraping] refactor

Innocenty Enikeev 7 vuotta sitten
vanhempi
commit
20c87a81e0
1 muutettua tiedostoa jossa 18 lisäystä ja 14 poistoa
  1. 18 14
      scraping.lisp

+ 18 - 14
scraping.lisp

@@ -78,24 +78,28 @@
      *cookie-jar* (quri:uri-host uri) (or (quri:uri-path uri) "/")
      :securep (string= (quri:uri-scheme uri) "https"))))
 
-(defun extract-from-node (node template &optional (uri *current-uri*))
-  (destructuring-bind (selector &optional post) template
-    (let* ((nodes (clss:select selector node))
-           (node (and (not (zerop (length nodes)))
-                      (elt nodes 0))))
-      (when node
-        (cond
-          ((eq :href post) (node-full-url node "href" uri))
-          ((stringp post) (plump:attribute node post))
-          (t (unspacify (text-with-cdata node))))))))
+(defun extract-data (node &key attr (uri *current-uri*))
+  (when node
+    (cond
+      ((eq :href attr) (node-full-url node "href" uri))
+      ((stringp attr) (plump:attribute node attr))
+      (t (unspacify (text-with-cdata node))))))
 
-(defun extract-info (items-selector info-templates &key limit (dom *current-dom*) (uri *current-uri*))
+(defun extract-info (parent template &optional (uri *current-uri*))
+  (destructuring-bind (selector &key attr multiple) template
+    (labels ((extract (n)
+               (extract-data n :attr attr :uri uri)))
+      (let* ((nodes (clss:select selector parent))
+             (data (map 'list #'extract nodes)))
+        (if multiple data (car data))))))
+
+(defun extract-items-info (items-selector info-templates &key limit (dom *current-dom*) (uri *current-uri*))
   (let ((nodes (clss:select items-selector dom)))
     (when limit
       (setf nodes (subseq nodes 0 (min limit (length nodes)))))
     (loop for node across nodes
        collect (loop for (key . template) in info-templates
-                  collect (cons key (extract-from-node node template uri))))))
+                  collect (cons key (extract-info node template uri))))))
 
 (defun scrape-list (info &optional context)
   (let* ((scrape (agets info :scrape))
@@ -103,8 +107,8 @@
          (init (agets info :init))
          (validate (agets info :validate)))
     (labels ((return-info ()
-               (extract-info (agets scrape :items)
-                             (agets scrape :info))))
+               (extract-items-info (agets scrape :items)
+                                   (agets scrape :info))))
       (unless (have-cookies request)
         (do-form init context validate))
       (with-request (request context)