Procházet zdrojové kódy

[tescort] scraping enchancement

Innocenty Enikeew před 7 roky
rodič
revize
1156166e19
2 změnil soubory, kde provedl 113 přidání a 55 odebrání
  1. 61 36
      plugins/tescort.lisp
  2. 52 19
      scraping.lisp

+ 61 - 36
plugins/tescort.lisp

@@ -1,38 +1,62 @@
 (in-package :cl-user)
 (defpackage chatikbot.plugins.tescort
-  (:use :cl :chatikbot.common :chatikbot.scraping))
+  (:use :cl :chatikbot.common :chatikbot.scraping :eager-future2))
 (in-package :chatikbot.plugins.tescort)
 
-(defparameter +login-form+
-  '(("http://www.tescort.com/private/signin")
-    ".signin-box form"
-    (("username" . login)
-     ("password" . pass))))
-
-(defparameter +blacklist-search+
-  '(("http://www.tescort.com/panel/client-blacklist" :parameters (("client_criterias" . q)
-                                                                  ("search" . "search")))
-    ".black_list_table tbody tr"
-    ((:date . ("td:nth-of-type(1)"))
-     (:name . ("td:nth-of-type(2)"))
-     (:city . ("td:nth-of-type(3)"))
-     (:phone . ("td:nth-of-type(4)"))
-     (:comment . ("td:nth-of-type(5)")))))
-
-(defvar *login*)
-(defvar *pass*)
-(defun login ()
-  (let ((dom (do-form +login-form+ `((login . ,*login*) (pass . ,*pass*)))))
-    (when (zerop (length (clss:select "#header .sitemenu-logged" dom)))
-      (error "bad password"))
-    dom))
-
-(defvar *tescort-cookie-jar* (cl-cookie:make-cookie-jar))
-(defun search-blacklist (query)
-  (let ((*cookie-jar* *tescort-cookie-jar*))
-    (unless (have-cookies (car +blacklist-search+))
-      (login))
-    (scrape-list +blacklist-search+ `((q . ,query)))))
+(defvar *chat-cookie-jars* (make-hash-table))
+
+(defparameter +blacklists+
+  '((:tescort
+     (:scrape (:request . ("http://www.tescort.com/panel/client-blacklist"
+                           :parameters (("client_criterias" . q)
+                                        ("search" . "search"))))
+      (:items . ".black_list_table tbody tr")
+      (:info (:date . ("td:nth-of-type(1)"))
+             (:name . ("td:nth-of-type(2)"))
+             (:city . ("td:nth-of-type(3)"))
+             (:phone . ("td:nth-of-type(4)"))
+             (:comment . ("td:nth-of-type(5)"))))
+     (:init . (("http://www.tescort.com/private/signin")
+               ".signin-box form"
+               (("username" . tescort-login)
+                ("password" . tescort-pass))))
+     (:validate . "#header .sitemenu-logged"))
+    (:annonce
+     (:scrape (:request . ("https://www.6annonce.com/private-v2/client-blacklist"
+                           :parameters (("client_criterias" . q)
+                                        ("search" . "search"))))
+      (:items . ".black_list_table tbody tr")
+      (:info (:date . ("td:nth-of-type(1)"))
+             (:name . ("td:nth-of-type(2)"))
+             (:city . ("td:nth-of-type(3)"))
+             (:phone . ("td:nth-of-type(4)"))
+             (:comment . ("td:nth-of-type(5)"))))
+     (:init . (("https://www.6annonce.com/private/signin")
+               ".signin-box form"
+               (("username" . annonce-login)
+                ("password" . annonce-pass))))
+     (:validate . "#header .sitemenu-logged"))))
+
+(defvar *tescort-login*)
+(defvar *tescort-pass*)
+(defvar *annonce-login*)
+(defvar *annonce-pass*)
+(defun get-common-context ()
+  `((tescort-login . ,*tescort-login*)
+    (tescort-pass . ,*tescort-pass*)
+    (annonce-login . ,*annonce-login*)
+    (annonce-pass . ,*annonce-pass*)))
+
+(defun search-blacklists (query)
+  (let ((context (append (get-common-context)
+                         `((q . ,query)))))
+    (labels ((f (info)
+               (destructuring-bind (name . params) info
+                 (cons name (ignore-errors (scrape-list params context))))))
+      (pmapcar #'f +blacklists+))))
+
+(defun merge-results (results)
+  (loop for (name . res) in results append res))
 
 (defun format-blacklist (blacklist)
   (with-output-to-string (s)
@@ -43,12 +67,13 @@
                   (agets item :comment)))))
 
 (defun handle-blacklist (chat-id query)
-  (telegram-send-chat-action chat-id "typing")
-  (let ((results (search-blacklist query)))
-    (bot-send-message chat-id (if results (format-blacklist results) "Not found")
-                      :parse-mode "markdown" :disable-web-preview "true")))
+  (with-chat-cookies (chat-id *chat-cookie-jars*)
+    (telegram-send-chat-action chat-id "typing")
+    (let ((results (merge-results (search-blacklists query))))
+      (bot-send-message chat-id (if results (format-blacklist results) "Not found")
+                        :parse-mode "markdown" :disable-web-preview "true"))))
 
-(def-message-cmd-handler handler-cmd-blacklist (:blacklist)
+(def-message-cmd-handler handler-cmd-blacklist (:blacklist :bl)
   (cond
     ((null args) (bot-send-message chat-id "Enter query")
      (on-next-message chat-id

+ 52 - 19
scraping.lisp

@@ -10,6 +10,7 @@
            :do-form
            :have-cookies
            :with-request
+           :with-chat-cookies
            :scrape-list))
 (in-package :chatikbot.scraping)
 
@@ -32,25 +33,44 @@
      (declare (ignorable *current-dom* *current-status* *current-headers* *current-uri*))
      ,@body))
 
+(defmacro with-chat-cookies ((chat-id chat-cookies) &body body)
+  (alexandria:with-gensyms (g-chat g-cookies)
+    `(let* ((,g-chat ,chat-id)
+            (,g-cookies ,chat-cookies)
+            (*cookie-jar* (or (gethash ,g-chat ,g-cookies)
+                              (setf (gethash ,g-chat ,g-cookies)
+                                    (cl-cookie:make-cookie-jar)))))
+       ,@body)))
+
 (defun node-full-url (node attr &optional (uri *current-uri*))
   (quri:render-uri (quri:merge-uris (quri:uri (plump:attribute node attr)) uri)))
 
-(defun do-form (form-info context)
-  (destructuring-bind (request selector template) form-info
-    (with-request (request context)
-      (let ((form (clss:select selector *current-dom*)))
-        (unless (zerop (length form))
-          (let* ((form (elt form 0))
-                 (action (node-full-url form "action"))
-                 (method (or (keyify (plump:attribute form "method")) :get))
-                 (content (append (apply-context template context)
-                                  (loop for input across (clss:select "input,select" form)
-                                     for name = (plump:attribute input "name")
-                                     for value = (plump:attribute input "value")
-                                     when name
-                                     unless (agets template name)
-                                     collect (cons name value)))))
-            (do-request `(,action :method ,method :content ,content))))))))
+(defun do-validate (info &optional (dom *current-dom*))
+  (cond
+    ((null info) t)
+    (t (not (zerop (length (clss:select info dom)))))))
+
+(defun do-form (form-info &optional context validate)
+  (when form-info
+    (destructuring-bind (request selector template) form-info
+      (with-request (request context)
+        (let ((form (clss:select selector *current-dom*)))
+          (unless (zerop (length form))
+            (let* ((form (elt form 0))
+                   (action (node-full-url form "action"))
+                   (method (or (keyify (plump:attribute form "method")) :get))
+                   (content (append (apply-context template context)
+                                    (loop for input across (clss:select "input,select" form)
+                                       for name = (plump:attribute input "name")
+                                       for value = (plump:attribute input "value")
+                                       when name
+                                       unless (agets template name)
+                                       collect (cons name value))))
+                   (action-request `(,action :method ,method :content ,content)))
+              (with-request (action-request)
+                (unless (do-validate validate)
+                  (error "validate error"))
+                (values *current-dom* *current-status* *current-headers* *current-uri*)))))))))
 
 (defun have-cookies (request)
   (let ((uri (quri:uri (car request))))
@@ -78,6 +98,19 @@
                   collect (cons key (extract-from-node node template uri))))))
 
 (defun scrape-list (info &optional context)
-  (destructuring-bind (request item-selector item-info) info
-    (with-request (request context)
-      (extract-info item-selector item-info))))
+  (let* ((scrape (agets info :scrape))
+         (request (agets scrape :request))
+         (init (agets info :init))
+         (validate (agets info :validate)))
+    (labels ((return-info ()
+               (extract-info (agets scrape :items)
+                             (agets scrape :info))))
+      (unless (have-cookies request)
+        (do-form init context validate))
+      (with-request (request context)
+        (if (do-validate validate)
+            (return-info)
+            (progn
+              (do-form init context validate)
+              (with-request (request context)
+                (return-info))))))))