- Парсинг RSS через ElementTree (RSS 2.0 / Atom) - Данные: title, dc:creator, guid isPermaLink, pubDate, category - Формат: заголовок до 60 символов, дата дд.мм.гггг, теги - Ссылки без https://, кликабельные - Консольная команда: !news
95 lines
3.4 KiB
Python
95 lines
3.4 KiB
Python
import requests
|
||
from xml.etree import ElementTree
|
||
|
||
|
||
RSS_URL = "https://habr.com/ru/rss/hubs/artificial_intelligence/articles/rated10/?fl=ru"
|
||
|
||
|
||
def news(stop_event, bot):
|
||
"""Вывести топ-5 свежих статей по AI с Habr"""
|
||
articles = _fetch_rss()
|
||
if articles is None:
|
||
print("Не удалось получить новости.")
|
||
return
|
||
|
||
if not articles:
|
||
print("Новостей пока нет.")
|
||
return
|
||
|
||
from datetime import datetime
|
||
print("**AI-новости с Habr**")
|
||
for i, article in enumerate(articles[:5], 1):
|
||
date_str = ""
|
||
if article["pub_date"]:
|
||
try:
|
||
d = article["pub_date"].replace(" GMT", " +0000")
|
||
dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z")
|
||
date_str = dt.strftime("%d.%m.%Y")
|
||
except ValueError:
|
||
date_str = article["pub_date"][:10].replace("-", ".")
|
||
tags_str = ", ".join(article["tags"][:3]) if article["tags"] else ""
|
||
link = article["link"].replace("https://", "")
|
||
title = article["title"]
|
||
if len(title) > 60:
|
||
title = title[:60] + "..."
|
||
print(f"{i}. {title}")
|
||
print(f" {article['creator']} | {date_str} | {tags_str}")
|
||
print(f" {link}")
|
||
print()
|
||
|
||
|
||
def _fetch_rss():
|
||
"""Скачать и распарсить RSS-ленту (RSS 2.0 / Atom)."""
|
||
try:
|
||
response = requests.get(RSS_URL, timeout=10)
|
||
response.raise_for_status()
|
||
root = ElementTree.fromstring(response.content)
|
||
|
||
# RSS 2.0
|
||
ns_dc = {"dc": "http://purl.org/dc/elements/1.1/"}
|
||
items = root.findall(".//item")
|
||
if not items:
|
||
# Atom
|
||
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
||
items = root.findall("atom:entry", ns)
|
||
if not items:
|
||
return []
|
||
|
||
articles = []
|
||
for entry in items:
|
||
# RSS 2.0
|
||
title_el = entry.find("title")
|
||
date_el = entry.find("pubDate")
|
||
creator_el = entry.find("dc:creator", ns_dc)
|
||
categories = entry.findall("category")
|
||
|
||
# guid с isPermaLink="true" для чистого URL
|
||
guid_el = entry.find("guid[@isPermaLink='true']")
|
||
link = guid_el.text if guid_el is not None else ""
|
||
|
||
# Atom fallback
|
||
if title_el is None:
|
||
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
||
title_el = entry.find("atom:title", ns)
|
||
link_el = entry.find("atom:link", ns)
|
||
link = link_el.get("href", "") if link_el is not None else ""
|
||
date_el = entry.find("atom:published", ns)
|
||
creator_el = entry.find("atom:author/atom:name", ns)
|
||
categories = entry.findall("atom:category", ns)
|
||
|
||
title = title_el.text if title_el is not None else "Без названия"
|
||
pub_date = date_el.text if date_el is not None else ""
|
||
creator = creator_el.text if creator_el is not None else ""
|
||
tags = [cat.text for cat in categories if cat.text] if categories else []
|
||
|
||
articles.append({
|
||
"title": title,
|
||
"link": link,
|
||
"pub_date": pub_date,
|
||
"creator": creator,
|
||
"tags": tags,
|
||
})
|
||
return articles[:10]
|
||
except requests.RequestException:
|
||
return None
|