deadzilla 6fe8334311 feat: добавить команду !news для AI-новостей с Habr
- Парсинг RSS через ElementTree (RSS 2.0 / Atom)
- Данные: title, dc:creator, guid isPermaLink, pubDate, category
- Формат: заголовок до 60 символов, дата дд.мм.гггг, теги
- Ссылки без https://, кликабельные
- Консольная команда: !news
2026-05-25 12:04:44 +05:00

95 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from xml.etree import ElementTree
RSS_URL = "https://habr.com/ru/rss/hubs/artificial_intelligence/articles/rated10/?fl=ru"
def news(stop_event, bot):
"""Вывести топ-5 свежих статей по AI с Habr"""
articles = _fetch_rss()
if articles is None:
print("Не удалось получить новости.")
return
if not articles:
print("Новостей пока нет.")
return
from datetime import datetime
print("**AI-новости с Habr**")
for i, article in enumerate(articles[:5], 1):
date_str = ""
if article["pub_date"]:
try:
d = article["pub_date"].replace(" GMT", " +0000")
dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z")
date_str = dt.strftime("%d.%m.%Y")
except ValueError:
date_str = article["pub_date"][:10].replace("-", ".")
tags_str = ", ".join(article["tags"][:3]) if article["tags"] else ""
link = article["link"].replace("https://", "")
title = article["title"]
if len(title) > 60:
title = title[:60] + "..."
print(f"{i}. {title}")
print(f" {article['creator']} | {date_str} | {tags_str}")
print(f" {link}")
print()
def _fetch_rss():
"""Скачать и распарсить RSS-ленту (RSS 2.0 / Atom)."""
try:
response = requests.get(RSS_URL, timeout=10)
response.raise_for_status()
root = ElementTree.fromstring(response.content)
# RSS 2.0
ns_dc = {"dc": "http://purl.org/dc/elements/1.1/"}
items = root.findall(".//item")
if not items:
# Atom
ns = {"atom": "http://www.w3.org/2005/Atom"}
items = root.findall("atom:entry", ns)
if not items:
return []
articles = []
for entry in items:
# RSS 2.0
title_el = entry.find("title")
date_el = entry.find("pubDate")
creator_el = entry.find("dc:creator", ns_dc)
categories = entry.findall("category")
# guid с isPermaLink="true" для чистого URL
guid_el = entry.find("guid[@isPermaLink='true']")
link = guid_el.text if guid_el is not None else ""
# Atom fallback
if title_el is None:
ns = {"atom": "http://www.w3.org/2005/Atom"}
title_el = entry.find("atom:title", ns)
link_el = entry.find("atom:link", ns)
link = link_el.get("href", "") if link_el is not None else ""
date_el = entry.find("atom:published", ns)
creator_el = entry.find("atom:author/atom:name", ns)
categories = entry.findall("atom:category", ns)
title = title_el.text if title_el is not None else "Без названия"
pub_date = date_el.text if date_el is not None else ""
creator = creator_el.text if creator_el is not None else ""
tags = [cat.text for cat in categories if cat.text] if categories else []
articles.append({
"title": title,
"link": link,
"pub_date": pub_date,
"creator": creator,
"tags": tags,
})
return articles[:10]
except requests.RequestException:
return None