| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- from requests_html import AsyncHTMLSession
- from bs4 import BeautifulSoup
- from contextlib import suppress
- from requests import get
- def get_paper_desc(id_paper: str) -> tuple | None:
- request = get(f'https://arxiv.org/abs/{id_paper}')
- if request.ok:
- soup = BeautifulSoup(request.content, features="lxml")
- with suppress(TypeError):
- url = soup.find('meta', property='og:url').get('content')
- title = soup.find('meta', property='og:title').get('content')
- description = soup.find('meta', property='og:description').get('content').replace('\n', ' ')
- return url, title, description
- return None
- async def get_summary(url: str = "https://arxiv.org/abs/2102.12092v2") -> str:
- url = url.replace("abs", "pdf")
- async_session = AsyncHTMLSession()
- async_response = await async_session.get(f"https://labs.kagi.com/ai/sum?url={url}.pdf")
- await async_response.html.arender(sleep=5)
- if res := async_response.html.find("p.description", first = True).text:
- await async_session.close()
- return res
- else:
- await async_response.html.arender(sleep=10)
- if res := async_response.html.find("p.description", first = True).text:
- await async_session.close()
- return res
- else:
- await async_session.close()
- return "Nothing to retrieve :("
- async def get_key_moments(url: str = "https://arxiv.org/abs/2102.12092v2") -> str:
- url = url.replace("abs", "pdf")
- async_session = AsyncHTMLSession()
- async_response = await async_session.get(f"https://labs.kagi.com/ai/sum?url={url}.pdf&expand=1")
- await async_response.html.arender(sleep=5)
- if res := async_response.html.find("p.description", first = True).text:
- await async_session.close()
- return res
- else:
- await async_response.html.arender(sleep=10)
- if res := async_response.html.find("p.description", first = True).text:
- await async_session.close()
- return res
- else:
- await async_session.close()
- return "Nothing to retrieve :("
|