discordBot/console_commands/news.py

import requests
from xml.etree import ElementTree


RSS_URL = "https://habr.com/ru/rss/hubs/artificial_intelligence/articles/rated10/?fl=ru"


def news(stop_event, bot):
    """Вывести топ-5 свежих статей по AI с Habr"""
    articles = _fetch_rss()
    if articles is None:
        print("Не удалось получить новости.")
        return

    if not articles:
        print("Новостей пока нет.")
        return

    from datetime import datetime
    print("**AI-новости с Habr**")
    for i, article in enumerate(articles[:5], 1):
        date_str = ""
        if article["pub_date"]:
            try:
                d = article["pub_date"].replace(" GMT", " +0000")
                dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z")
                date_str = dt.strftime("%d.%m.%Y")
            except ValueError:
                date_str = article["pub_date"][:10].replace("-", ".")
        tags_str = ", ".join(article["tags"][:3]) if article["tags"] else ""
        link = article["link"].replace("https://", "")
        title = article["title"]
        if len(title) > 60:
            title = title[:60] + "..."
        print(f"{i}. {title}")
        print(f"   {article['creator']} | {date_str} | {tags_str}")
        print(f"   {link}")
        print()


def _fetch_rss():
    """Скачать и распарсить RSS-ленту (RSS 2.0 / Atom)."""
    try:
        response = requests.get(RSS_URL, timeout=10)
        response.raise_for_status()
        root = ElementTree.fromstring(response.content)

        # RSS 2.0
        ns_dc = {"dc": "http://purl.org/dc/elements/1.1/"}
        items = root.findall(".//item")
        if not items:
            # Atom
            ns = {"atom": "http://www.w3.org/2005/Atom"}
            items = root.findall("atom:entry", ns)
        if not items:
            return []

        articles = []
        for entry in items:
            # RSS 2.0
            title_el = entry.find("title")
            date_el = entry.find("pubDate")
            creator_el = entry.find("dc:creator", ns_dc)
            categories = entry.findall("category")

            # guid с isPermaLink="true" для чистого URL
            guid_el = entry.find("guid[@isPermaLink='true']")
            link = guid_el.text if guid_el is not None else ""

            # Atom fallback
            if title_el is None:
                ns = {"atom": "http://www.w3.org/2005/Atom"}
                title_el = entry.find("atom:title", ns)
                link_el = entry.find("atom:link", ns)
                link = link_el.get("href", "") if link_el is not None else ""
                date_el = entry.find("atom:published", ns)
                creator_el = entry.find("atom:author/atom:name", ns)
                categories = entry.findall("atom:category", ns)

            title = title_el.text if title_el is not None else "Без названия"
            pub_date = date_el.text if date_el is not None else ""
            creator = creator_el.text if creator_el is not None else ""
            tags = [cat.text for cat in categories if cat.text] if categories else []

            articles.append({
                "title": title,
                "link": link,
                "pub_date": pub_date,
                "creator": creator,
                "tags": tags,
            })
        return articles[:10]
    except requests.RequestException:
        return None