|
@@ -92,25 +92,41 @@ is replaced with replacement."
|
|
|
nil)))
|
|
nil)))
|
|
|
|
|
|
|
|
;; XML processing
|
|
;; XML processing
|
|
|
-(defun xml-request (url)
|
|
|
|
|
|
|
+(defun xml-request (url &optional encoding)
|
|
|
(multiple-value-bind (raw-body status headers uri http-stream)
|
|
(multiple-value-bind (raw-body status headers uri http-stream)
|
|
|
(drakma:http-request (http-default url)
|
|
(drakma:http-request (http-default url)
|
|
|
:force-binary t
|
|
:force-binary t
|
|
|
:decode-content t)
|
|
:decode-content t)
|
|
|
- (declare (ignore status headers http-stream))
|
|
|
|
|
- (let* ((dom
|
|
|
|
|
- (handler-case
|
|
|
|
|
- (plump:parse (flex:octets-to-string raw-body :external-format :utf-8))
|
|
|
|
|
- (flex:external-format-encoding-error ()
|
|
|
|
|
- (plump:parse (flex:octets-to-string raw-body)))))
|
|
|
|
|
- (encoding (ignore-errors
|
|
|
|
|
- (plump:get-attribute (plump:first-child dom) "encoding"))))
|
|
|
|
|
|
|
+ (declare (ignore status http-stream))
|
|
|
|
|
+ (let ((encoding
|
|
|
|
|
+ (or
|
|
|
|
|
+ ;; 1. Provided encoding
|
|
|
|
|
+ encoding
|
|
|
|
|
+ ;; 2. Content-type header
|
|
|
|
|
+ (ignore-errors
|
|
|
|
|
+ (let ((ct (aget :content-type headers)))
|
|
|
|
|
+ (subseq ct (1+ (position #\= ct)))))
|
|
|
|
|
+ ;; 3. Parse first 1000 bytes
|
|
|
|
|
+ (ignore-errors
|
|
|
|
|
+ (let ((dom (plump:parse (flex:octets-to-string (subseq raw-body 0 1000)))))
|
|
|
|
|
+ (or
|
|
|
|
|
+ ;; 3.1 Content-type from http-equiv
|
|
|
|
|
+ (ignore-errors
|
|
|
|
|
+ (let ((ct (loop for meta in (get-by-tag dom "meta")
|
|
|
|
|
+ for http-equiv = (plump:get-attribute meta "http-equiv")
|
|
|
|
|
+ for content = (plump:get-attribute meta "content")
|
|
|
|
|
+ when (equal http-equiv "Content-Type")
|
|
|
|
|
+ return content)))
|
|
|
|
|
+ (subseq ct (1+ (position #\= ct)))))
|
|
|
|
|
+ ;; 3.2 'content' xml node attribute
|
|
|
|
|
+ (ignore-errors (plump:get-attribute (plump:first-child dom) "encoding")))))
|
|
|
|
|
+ ;; 4. Default 'utf-8'
|
|
|
|
|
+ "utf-8")))
|
|
|
(values
|
|
(values
|
|
|
- (if (and encoding (not (equal encoding "utf-8")))
|
|
|
|
|
- (plump:parse (flex:octets-to-string
|
|
|
|
|
- raw-body
|
|
|
|
|
- :external-format (intern encoding 'keyword)))
|
|
|
|
|
- dom)
|
|
|
|
|
|
|
+ (handler-bind ((flex:external-format-encoding-error
|
|
|
|
|
+ (lambda (c) (use-value #\? c))))
|
|
|
|
|
+ (plump:parse
|
|
|
|
|
+ (flex:octets-to-string raw-body :external-format (intern encoding 'keyword))))
|
|
|
uri))))
|
|
uri))))
|
|
|
|
|
|
|
|
(defun get-by-tag (node tag)
|
|
(defun get-by-tag (node tag)
|