|
@@ -78,24 +78,28 @@
|
|
|
*cookie-jar* (quri:uri-host uri) (or (quri:uri-path uri) "/")
|
|
*cookie-jar* (quri:uri-host uri) (or (quri:uri-path uri) "/")
|
|
|
:securep (string= (quri:uri-scheme uri) "https"))))
|
|
:securep (string= (quri:uri-scheme uri) "https"))))
|
|
|
|
|
|
|
|
-(defun extract-from-node (node template &optional (uri *current-uri*))
|
|
|
|
|
- (destructuring-bind (selector &optional post) template
|
|
|
|
|
- (let* ((nodes (clss:select selector node))
|
|
|
|
|
- (node (and (not (zerop (length nodes)))
|
|
|
|
|
- (elt nodes 0))))
|
|
|
|
|
- (when node
|
|
|
|
|
- (cond
|
|
|
|
|
- ((eq :href post) (node-full-url node "href" uri))
|
|
|
|
|
- ((stringp post) (plump:attribute node post))
|
|
|
|
|
- (t (unspacify (text-with-cdata node))))))))
|
|
|
|
|
|
|
+(defun extract-data (node &key attr (uri *current-uri*))
|
|
|
|
|
+ (when node
|
|
|
|
|
+ (cond
|
|
|
|
|
+ ((eq :href attr) (node-full-url node "href" uri))
|
|
|
|
|
+ ((stringp attr) (plump:attribute node attr))
|
|
|
|
|
+ (t (unspacify (text-with-cdata node))))))
|
|
|
|
|
|
|
|
-(defun extract-info (items-selector info-templates &key limit (dom *current-dom*) (uri *current-uri*))
|
|
|
|
|
|
|
+(defun extract-info (parent template &optional (uri *current-uri*))
|
|
|
|
|
+ (destructuring-bind (selector &key attr multiple) template
|
|
|
|
|
+ (labels ((extract (n)
|
|
|
|
|
+ (extract-data n :attr attr :uri uri)))
|
|
|
|
|
+ (let* ((nodes (clss:select selector parent))
|
|
|
|
|
+ (data (map 'list #'extract nodes)))
|
|
|
|
|
+ (if multiple data (car data))))))
|
|
|
|
|
+
|
|
|
|
|
+(defun extract-items-info (items-selector info-templates &key limit (dom *current-dom*) (uri *current-uri*))
|
|
|
(let ((nodes (clss:select items-selector dom)))
|
|
(let ((nodes (clss:select items-selector dom)))
|
|
|
(when limit
|
|
(when limit
|
|
|
(setf nodes (subseq nodes 0 (min limit (length nodes)))))
|
|
(setf nodes (subseq nodes 0 (min limit (length nodes)))))
|
|
|
(loop for node across nodes
|
|
(loop for node across nodes
|
|
|
collect (loop for (key . template) in info-templates
|
|
collect (loop for (key . template) in info-templates
|
|
|
- collect (cons key (extract-from-node node template uri))))))
|
|
|
|
|
|
|
+ collect (cons key (extract-info node template uri))))))
|
|
|
|
|
|
|
|
(defun scrape-list (info &optional context)
|
|
(defun scrape-list (info &optional context)
|
|
|
(let* ((scrape (agets info :scrape))
|
|
(let* ((scrape (agets info :scrape))
|
|
@@ -103,8 +107,8 @@
|
|
|
(init (agets info :init))
|
|
(init (agets info :init))
|
|
|
(validate (agets info :validate)))
|
|
(validate (agets info :validate)))
|
|
|
(labels ((return-info ()
|
|
(labels ((return-info ()
|
|
|
- (extract-info (agets scrape :items)
|
|
|
|
|
- (agets scrape :info))))
|
|
|
|
|
|
|
+ (extract-items-info (agets scrape :items)
|
|
|
|
|
+ (agets scrape :info))))
|
|
|
(unless (have-cookies request)
|
|
(unless (have-cookies request)
|
|
|
(do-form init context validate))
|
|
(do-form init context validate))
|
|
|
(with-request (request context)
|
|
(with-request (request context)
|