117 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from xml.etree import ElementTree
RSS_URL_ARTICLES = "https://habr.com/ru/rss/hubs/artificial_intelligence/articles/top/daily/?fl=ru"
RSS_URL_POSTS = "https://habr.com/ru/rss/hubs/artificial_intelligence/news/top/daily/?fl=ru"
def news(stop_event, bot):
"""Вывести топ-5 свежих статей по AI с Habr"""
articles = _fetch_rss(RSS_URL_ARTICLES)
if articles is None:
print("Не удалось получить новости.")
return
if not articles:
print("Новостей пока нет.")
return
from datetime import datetime
print("**Лучшие статьи за сутки / Искусственный интеллект / Хабr**")
print("<https://habr.com/ru/hubs/artificial_intelligence/articles/top/daily/>")
print()
for i, article in enumerate(articles[:5], 1):
date_str = ""
if article["pub_date"]:
try:
d = article["pub_date"].replace(" GMT", " +0000")
dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z")
date_str = dt.strftime("%d.%m.%Y")
except ValueError:
date_str = article["pub_date"][:10].replace("-", ".")
title = article["title"]
if len(title) > 60:
title = title[:60] + "..."
print(f"{title}\n {date_str} {article['link']}")
print("────────────────────────────────────────")
print()
# Второй блок: посты
posts = _fetch_rss(RSS_URL_POSTS)
if posts:
print("**Лучшие новости за сутки / Искусственный интеллект / Хабr**")
print("<https://habr.com/ru/hubs/artificial_intelligence/news/top/daily/>")
print()
for i, article in enumerate(posts[:5], 1):
date_str = ""
if article["pub_date"]:
try:
d = article["pub_date"].replace(" GMT", " +0000")
dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z")
date_str = dt.strftime("%d.%m.%Y")
except ValueError:
date_str = article["pub_date"][:10].replace("-", ".")
title = article["title"]
if len(title) > 60:
title = title[:60] + "..."
print(f"{title}\n {date_str} {article['link']}")
print("────────────────────────────────────────")
print()
def _fetch_rss(url):
"""Скачать и распарсить RSS-ленту (RSS 2.0 / Atom)."""
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
root = ElementTree.fromstring(response.content)
# RSS 2.0
ns_dc = {"dc": "http://purl.org/dc/elements/1.1/"}
items = root.findall(".//item")
if not items:
# Atom
ns = {"atom": "http://www.w3.org/2005/Atom"}
items = root.findall("atom:entry", ns)
if not items:
return []
articles = []
for entry in items:
# RSS 2.0
title_el = entry.find("title")
date_el = entry.find("pubDate")
creator_el = entry.find("dc:creator", ns_dc)
categories = entry.findall("category")
# guid с isPermaLink="true" для чистого URL
guid_el = entry.find("guid[@isPermaLink='true']")
link = guid_el.text if guid_el is not None else ""
# Atom fallback
if title_el is None:
ns = {"atom": "http://www.w3.org/2005/Atom"}
title_el = entry.find("atom:title", ns)
link_el = entry.find("atom:link", ns)
link = link_el.get("href", "") if link_el is not None else ""
date_el = entry.find("atom:published", ns)
creator_el = entry.find("atom:author/atom:name", ns)
categories = entry.findall("atom:category", ns)
title = title_el.text if title_el is not None else "Без названия"
pub_date = date_el.text if date_el is not None else ""
creator = creator_el.text if creator_el is not None else ""
tags = [cat.text for cat in categories if cat.text] if categories else []
articles.append({
"title": title,
"link": link,
"pub_date": pub_date,
"creator": creator,
"tags": tags,
})
return articles[:10]
except requests.RequestException:
return None