Sfoglia il codice sorgente

🚧 build(docker): add cron and backup logic of db inside docker

metya 2 anni fa
parent
commit
1603a690bb
10 ha cambiato i file con 78 aggiunte e 22 eliminazioni
  1. 8 1
      .dockerignore
  2. 10 4
      Dockerfile
  3. 5 0
      backup_db.sh
  4. 1 3
      config.py
  5. 0 10
      db.py
  6. 1 0
      docker-compose.yaml
  7. 1 1
      pyproject.toml
  8. 50 0
      script.exp
  9. 1 2
      summarize.py
  10. 1 1
      vanitybot.py

+ 8 - 1
.dockerignore

@@ -1,4 +1,11 @@
 geckodriver
 *.ipynb
 *.log
-token
+token
+.DS_Store
+.ipynb_checkpoints
+.vscode
+__pycache__
+.mypy_cache
+js.js
+paper.json

+ 10 - 4
Dockerfile

@@ -3,18 +3,24 @@ FROM python:alpine
 ENV PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on
 ARG API_TOKEN
+ARG GITHUB_TOKEN
+ENV GITHUB_TOKEN=$GITHUB_TOKEN
 ENV API_TOKEN=$API_TOKEN
 
 RUN apk add --no-cache --virtual .build-deps gcc musl-dev libffi-dev curl
-RUN apk add --no-cache git git-lfs
+RUN apk add --no-cache git git-lfs expect
 
 WORKDIR /app
 ADD requirements.txt /app
 RUN pip install --no-cache-dir -r requirements.txt
 RUN apk del .build-deps
 
-ADD . /app
+# RUN echo "0 0 * * 1 /bin/sh /app/backup_db.sh" >> /var/spool/cron/crontabs/root
+RUN echo "* * * * * /bin/sh /app/backup_db.sh" > /var/spool/cron/crontabs/root
+
+RUN git config --global user.name "metya"
+RUN git config --global user.email "metya.tm@gmail.com"
 
-# EXPOSE 8081
+ADD . /app
 
-ENTRYPOINT [ "python", "vanitybot.py" ]
+ENTRYPOINT crond -f -l 0 && python vanitybot.py

+ 5 - 0
backup_db.sh

@@ -0,0 +1,5 @@
+#!/usr/bin/expect
+cd /app
+git add papers.db
+git commit -m "backup papers.db" --no-verify
+./script.exp ${GITHUB_TOKEN}

+ 1 - 3
config.py

@@ -6,7 +6,5 @@ if env_var := dotenv_values('token'):
 else:
     API_TOKEN = getenv("API_TOKEN")
 
-print(API_TOKEN)
-
 if __name__ == "__main__":
-    print(API_TOKEN)
+    print(API_TOKEN)

+ 0 - 10
db.py

@@ -149,7 +149,6 @@ if __name__ == "__main__":
     import json
     with open("paper.json", 'r') as file:
         paper = json.load(file)
-    # print(paper)
     paper["title"] = paper["metadata"]["title"]
     paper["authors"] = paper["metadata"]["author"].split(",").strip()
     paper["abstract"] = paper["metadata"]["abstract"]
@@ -157,13 +156,4 @@ if __name__ == "__main__":
 
     async def main():
         await add_authors_and_paper("2203.02155v1", paper)
-
-        # paper = await check_paper("1234.1212v1")
-        # if paper:
-        #     print("\n\n\n\n")
-        #     print(paper.summary)
-        #     print("\n\n")
-        #     print(paper.summary[0])
-
     asyncio.run(main())
-    

+ 1 - 0
docker-compose.yaml

@@ -5,4 +5,5 @@ services:
     # image: vanity
     environment:
       - API_TOKEN
+      - GITHUB_TOKEN
     restart: always

+ 1 - 1
pyproject.toml

@@ -1,4 +1,4 @@
 [tool.commitizen]
-name = "cz_commitizen_emoji"
+# name = "cz_commitizen_emoji"
 version = "1.0.0"
 tag_format = "$version"

+ 50 - 0
script.exp

@@ -0,0 +1,50 @@
+#!/usr/bin/expect -f
+#
+# This Expect script was generated by autoexpect on Mon Feb 27 18:59:37 2023
+# Expect and autoexpect were both written by Don Libes, NIST.
+#
+# Note that autoexpect does not guarantee a working script.  It
+# necessarily has to guess about certain things.  Two reasons a script
+# might fail are:
+#
+# 1) timing - A surprising number of programs (rn, ksh, zsh, telnet,
+# etc.) and devices discard or ignore keystrokes that arrive "too
+# quickly" after prompts.  If you find your new script hanging up at
+# one spot, try adding a short sleep just before the previous send.
+# Setting "force_conservative" to 1 (see below) makes Expect do this
+# automatically - pausing briefly before sending each character.  This
+# pacifies every program I know of.  The -c flag makes the script do
+# this in the first place.  The -C flag allows you to define a
+# character to toggle this mode off and on.
+
+set force_conservative 0  ;# set to 1 to force conservative mode even if
+			  ;# script wasn't run conservatively originally
+if {$force_conservative} {
+	set send_slow {1 .1}
+	proc send {ignore arg} {
+		sleep .1
+		exp_send -s -- $arg
+	}
+}
+
+#
+# 2) differing output - Some programs produce different output each time
+# they run.  The "date" command is an obvious example.  Another is
+# ftp, if it produces throughput statistics at the end of a file
+# transfer.  If this causes a problem, delete these patterns or replace
+# them with wildcards.  An alternative is to use the -p flag (for
+# "prompt") which makes Expect only look for the last line of output
+# (i.e., the prompt).  The -P flag allows you to define a character to
+# toggle this mode off and on.
+#
+# Read the man page for more info.
+#
+# -Don
+
+set GITHUB_TOKEN [lindex $argv 0]
+set timeout -1
+spawn git push https://$GITHUB_TOKEN@github.com/metya/vanity-bot
+match_max 100000
+expect -exact "Password for 'https://$GITHUB_TOKEN@github.com': "
+send -- "\r"
+expect eof

+ 1 - 2
summarize.py

@@ -130,7 +130,7 @@ async def get_paper_desc(id_paper: str) -> dict | None:
         async with ClientSession() as session:
             async with await session.get(f'https://arxiv.org/abs/{id_paper}') as request:
                 if request.ok:
-                    soup = BeautifulSoup(await request.text(), features="xml")
+                    soup = BeautifulSoup(await request.text(), features="html.parser")
                     try:
                         url = soup.find('meta', property='og:url').get('content') # type: ignore
                         paper = {
@@ -145,4 +145,3 @@ async def get_paper_desc(id_paper: str) -> dict | None:
                     except TypeError:
                         pass
     return None
-

+ 1 - 1
vanitybot.py

@@ -83,4 +83,4 @@ if __name__ == "__main__":
     # logging.getLogger("aiogram_dialog").setLevel(logging.DEBUG)
 
     asyncio.get_event_loop().run_until_complete(deploy_message())
-    executor.start_polling(dp, skip_updates=True)
+    executor.start_polling(dp, skip_updates=True)