import asyncio from datetime import datetime import requests RSS_URL_ARTICLES = "https://habr.com/ru/rss/hubs/artificial_intelligence/articles/top/daily/?fl=ru" RSS_URL_POSTS = "https://habr.com/ru/rss/hubs/artificial_intelligence/news/top/daily/?fl=ru" _session = requests.Session() async def fetch_rss(url): """Скачать и распарсить RSS-ленту (RSS 2.0 / Atom).""" from xml.etree import ElementTree try: response = await asyncio.to_thread(_session.get, url, timeout=10) response.raise_for_status() root = ElementTree.fromstring(response.content) # RSS 2.0 ns_dc = {"dc": "http://purl.org/dc/elements/1.1/"} items = root.findall(".//item") if not items: # Atom ns = {"atom": "http://www.w3.org/2005/Atom"} items = root.findall("atom:entry", ns) if not items: return [] articles = [] for entry in items: # RSS 2.0 title_el = entry.find("title") date_el = entry.find("pubDate") creator_el = entry.find("dc:creator", ns_dc) categories = entry.findall("category") # guid с isPermaLink="true" для чистого URL guid_el = entry.find("guid[@isPermaLink='true']") link = guid_el.text if guid_el is not None else "" # Atom fallback if title_el is None: ns = {"atom": "http://www.w3.org/2005/Atom"} title_el = entry.find("atom:title", ns) link_el = entry.find("atom:link", ns) link = link_el.get("href", "") if link_el is not None else "" date_el = entry.find("atom:published", ns) creator_el = entry.find("atom:author/atom:name", ns) categories = entry.findall("atom:category", ns) title = title_el.text if title_el is not None else "Без названия" pub_date = date_el.text if date_el is not None else "" creator = creator_el.text if creator_el is not None else "" tags = [cat.text for cat in categories if cat.text] if categories else [] articles.append({ "title": title, "link": link, "pub_date": pub_date, "creator": creator, "tags": tags, }) return articles[:10] except requests.exceptions.RequestException: return None def _parse_date(pub_date): """Парсить дату из RSS в строку 'дд.мм.гггг' или вернуть часть даты.""" if not pub_date: return "" try: d = pub_date.replace(" GMT", " +0000") dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z") return dt.strftime("%d.%m.%Y") except ValueError: return pub_date[:10].replace("-", ".") def truncate_title(title, max_len=60): """Обрезать заголовок, если он длиннее max_len.""" if len(title) > max_len: return title[:max_len] + "..." return title def format_articles(articles, title, link): """Сформировать список строк для вывода статей/постов.""" lines = [f"**{title}**\n<{link}>"] for i, article in enumerate(articles[:5], 1): date_str = _parse_date(article["pub_date"]) short_title = truncate_title(article["title"]) lines.append(f"{short_title}\n{date_str} <{article['link']}>") return lines