1
0
فهرست منبع

[shopper] initial version

Innocenty Enikeew 7 سال پیش
والد
کامیت
6345d7785a
4فایلهای تغییر یافته به همراه169 افزوده شده و 0 حذف شده
  1. 1 0
      chatikbot.asd
  2. 2 0
      common.lisp
  3. 156 0
      plugins/shopper.lisp
  4. 10 0
      utils.lisp

+ 1 - 0
chatikbot.asd

@@ -13,6 +13,7 @@
                #:clon
                #:clss
                #:dexador
+               #:eager-future2
                #:hunchentoot
                #:ironclad
                #:local-time

+ 2 - 0
common.lisp

@@ -45,6 +45,7 @@
            :select-text
            :trim-nil
            :text-with-cdata
+           :unspacify
            :child-text
            :clean-text
            :json-request
@@ -60,6 +61,7 @@
            :get-chat-timezone
            :same-time-in-chat
            :group-by
+           :pmapcar
            :telegram-get-me
            :telegram-send-message
            :telegram-forward-message

+ 156 - 0
plugins/shopper.lisp

@@ -0,0 +1,156 @@
+(in-package :cl-user)
+(defpackage chatikbot.plugins.shopper
+  (:use :cl :chatikbot.common))
+(in-package :chatikbot.plugins.shopper)
+
+(defvar *cookie-jar* (cl-cookie:make-cookie-jar) "Currently active cookies")
+
+(defparameter +shops+
+  `((:crc (:type :bike)
+          (:form . (("http://www.chainreactioncycles.com/")
+                    "#localepickerpage > form"
+                    (("/atg/userprofiling/ProfileFormHandler.value.country" . "RU")
+                     ("/atg/userprofiling/ProfileFormHandler.value.currency" . "EUR")
+                     ("/atg/userprofiling/ProfileFormHandler.value.language" . "en"))))
+          (:search (:request . ("http://www.chainreactioncycles.com/ru/en/s" :parameters (("q" . q))))
+                   (:items . "div.products_details > ul")
+                   (:info (:link . (".description > a" :href))
+                          (:title . (".description"))
+                          (:price . (".fromamt")))))
+    (:b24 (:type . :bike)
+          (:init . ("https://www.bike24.de/" :method :post
+                   :content (("country" . "23")
+                             ("lang" . "2")
+                             ("action" . "locale_select"))))
+         (:search (:request . ("https://www.bike24.de/1.php"
+                               :parameters (("content" . "13") ("search" . q))))
+                  (:items . ".box-product-list-item-default")
+                  (:info (:link . ("a" :href))
+                         (:title . (".title"))
+                         (:price . (".box-price")))))
+    (:bd (:type . :bike)
+         (:init . ("https://www.bike-discount.de/index.php"
+                   :parameters (("lang" . "en")
+                                ("delivery_country" . "144")
+                                ("currency" . "1"))))
+         (:search (:request . ("https://www.bike-discount.de/en/search"
+                               :parameters (("q" . q))))
+                  (:items . ".element_artikel_gallery")
+                  (:info (:link . ("a" :href))
+                         (:title . (".productimage img" "alt"))
+                         (:price . ("[itemprop='price']" "content")))))
+    (:bc (:type . :bike)
+         (:search (:request . ("https://www.bike-components.de/en/s/" :parameters (("keywords" . q))))
+                  (:items . ".site-product-items > .item")
+                  (:info (:link . ("a" :href))
+                         (:title . ("h3"))
+                         (:price . (".price")))))
+    (:birota (:type :bike)
+             (:search (:request . ("https://www.birota.ru/shop/search/index.php" :parameters (("q" . q))))
+                      (:items . "[itemtype='http://schema.org/Product']")
+                      (:info (:link . ("[itemprop='url']" :href))
+                             (:title . ("[itemprop='name']" "content"))
+                             (:price . (".price")))))))
+
+(defun apply-context (template context)
+  (sublis context template))
+
+(defun do-request (template &optional context)
+  (let ((args (apply-context template context)))
+    (apply 'xml-request `(,@args :cookie-jar ,*cookie-jar*))))
+
+(defun node-full-url (node attr uri)
+  (quri:render-uri (quri:merge-uris (quri:uri (plump:attribute node attr)) uri)))
+
+(defun do-form (form-info)
+  (destructuring-bind (url form-selector context) form-info
+    (multiple-value-bind (dom status headers uri)
+        (do-request url)
+      (declare (ignore status headers))
+      (let ((form (clss:select form-selector dom)))
+        (unless (zerop (length form))
+          (let* ((form (elt form 0))
+                 (action (node-full-url form "action" uri))
+                 (method (or (keyify (plump:attribute form "method")) :get))
+                 (content (append context
+                                  (loop for input across (clss:select "input,select" form)
+                                     for name = (plump:attribute input "name")
+                                     for value = (plump:attribute input "value")
+                                     when name
+                                     unless (agets context name)
+                                     collect (cons name value)))))
+            (do-request `(,action :method ,method :content ,content))))))))
+
+(defun extract-data (node template uri)
+  (destructuring-bind (selector &optional post) template
+    (let* ((nodes (clss:select selector node))
+           (node (and (not (zerop (length nodes)))
+                      (elt nodes 0))))
+      (when node
+        (cond
+          ((eq :href post) (node-full-url node "href" uri))
+          ((stringp post) (plump:attribute node post))
+          (t (unspacify (text-with-cdata node))))))))
+
+(defun parse-dom (uri dom items-selector info-templates &optional limit)
+  (let ((nodes (clss:select items-selector dom)))
+    (when limit
+      (setf nodes (subseq nodes 0 (min limit (length nodes)))))
+    (loop for node across nodes
+       collect (loop for (key . template) in info-templates
+                  collect (cons key (extract-data node template uri))))))
+
+(defun have-cookies (request)
+  (let ((uri (quri:uri (car request))))
+    (cl-cookie:cookie-jar-host-cookies
+     *cookie-jar* (quri:uri-host uri) (or (quri:uri-path uri) "/")
+     :securep (string= (quri:uri-scheme uri) "https"))))
+
+(defun search-shop (query params &optional limit)
+  (let ((init (agets params :init))
+        (form (agets params :form))
+        (search (agets params :search)))
+    (unless (have-cookies (agets search :request))
+      (when init (do-request init))
+      (when form (do-form form)))
+    (multiple-value-bind (dom status headers uri)
+        (do-request (agets search :request) `((q . ,query)))
+      (declare (ignore status headers))
+      (parse-dom uri dom (agets search :items) (agets search :info) limit))))
+
+(defun search-shops (query shops &optional limit)
+  (labels ((f (shop)
+             (destructuring-bind (name . params) shop
+               (ignore-errors
+                 (cons name (search-shop query params limit))))))
+    (pmapcar #'f shops)))
+
+(defun merge-search-results (search-results)
+  (let (merged)
+    (dolist (shop search-results (nreverse merged))
+      (dolist (res (cdr shop))
+        (let* ((title (agets res :title))
+               (pair (assoc title merged :test 'equal)))
+          (unless pair
+            (setf pair (car (push (list title) merged))))
+          (push (cons (car shop) res) (cdr pair)))))))
+
+(defun format-merged (merged)
+  (with-output-to-string (s)
+    (loop for (title . shops) in merged
+       do (format s "~A - ~{*~A*: [~A](~A)~^, ~}~%" title
+                  (loop for (name . result) in shops
+                     append (list (dekeyify name)
+                                  (agets result :price)
+                                  (agets result :link)))))))
+
+(defun handle-search (chat-id query)
+  (let ((results (merge-search-results
+                  (search-shops query +shops+ 3))))
+    (bot-send-message chat-id (if results (format-merged results) "Ничего не нашлось")
+                      :parse-mode "markdown" :disable-web-preview "true")))
+
+(def-message-cmd-handler handler-cmd-wall (:bike)
+  (cond
+    ((null args) (bot-send-message chat-id "/bike <query>"))
+    (:otherwise (handle-search chat-id (spaced args)))))

+ 10 - 0
utils.lisp

@@ -33,6 +33,7 @@
            :select-text
            :trim-nil
            :text-with-cdata
+           :unspacify
            :child-text
            :clean-text
            :json-request
@@ -49,6 +50,7 @@
            :get-chat-timezone
            :same-time-in-chat
            :group-by
+           :pmapcar
            :message-id
            :from-id
            :chat-id
@@ -301,6 +303,10 @@ is replaced with replacement."
                        (plump:nesting-node (r child))))))
       (r node))))
 
+(let ((ws-regex (cl-ppcre:create-scanner "[\\n 	]+" :multi-line-mode t)))
+  (defun unspacify (text)
+    (string-trim " " (cl-ppcre:regex-replace-all ws-regex text " "))))
+
 (defun child-text (node tag)
   (alexandria:when-let (child (car (get-by-tag node tag)))
     (trim-nil (text-with-cdata child))))
@@ -420,6 +426,10 @@ is replaced with replacement."
        do (push item (cdr group)))
     grouped))
 
+(defun pmapcar (f list)
+  (let ((result (mapcar (lambda (n) (eager-future2:pexec (funcall f n))) list)))
+    (map-into result #'eager-future2:yield result)))
+
 ;; Fix bug in local-time (following symlinks in /usr/share/zoneinfo/
 ;; leads to bad cutoff)
 (in-package #:local-time)