from requests_html import AsyncHTMLSession from bs4 import BeautifulSoup from contextlib import suppress from requests import get def get_paper_desc(id_paper: str) -> tuple | None: request = get(f'https://arxiv.org/abs/{id_paper}') if request.ok: soup = BeautifulSoup(request.content, features="lxml") with suppress(TypeError): url = soup.find('meta', property='og:url').get('content') title = soup.find('meta', property='og:title').get('content') description = soup.find('meta', property='og:description').get('content').replace('\n', ' ') return url, title, description return None async def get_summary(url: str = "https://arxiv.org/abs/2102.12092v2") -> str: url = url.replace("abs", "pdf") async_session = AsyncHTMLSession() async_response = await async_session.get(f"https://labs.kagi.com/ai/sum?url={url}.pdf") await async_response.html.arender(sleep=5) if res := async_response.html.find("p.description", first = True).text: await async_session.close() return res else: await async_response.html.arender(sleep=10) if res := async_response.html.find("p.description", first = True).text: await async_session.close() return res else: await async_session.close() return "Nothing to retrieve :(" async def get_key_moments(url: str = "https://arxiv.org/abs/2102.12092v2") -> str: url = url.replace("abs", "pdf") async_session = AsyncHTMLSession() async_response = await async_session.get(f"https://labs.kagi.com/ai/sum?url={url}.pdf&expand=1") await async_response.html.arender(sleep=5) if res := async_response.html.find("p.description", first = True).text: await async_session.close() return res else: await async_response.html.arender(sleep=10) if res := async_response.html.find("p.description", first = True).text: await async_session.close() return res else: await async_session.close() return "Nothing to retrieve :("