discordBot/console_commands/news.py

import requests
from xml.etree import ElementTree


RSS_URL_ARTICLES = "https://habr.com/ru/rss/hubs/artificial_intelligence/articles/top/daily/?fl=ru"
RSS_URL_POSTS = "https://habr.com/ru/rss/hubs/artificial_intelligence/news/top/daily/?fl=ru"


def news(stop_event, bot):
    """Вывести топ-5 свежих статей по AI с Habr"""
    articles = _fetch_rss(RSS_URL_ARTICLES)
    if articles is None:
        print("Не удалось получить новости.")
        return

    if not articles:
        print("Новостей пока нет.")
        return

    from datetime import datetime
    print("**Лучшие статьи за сутки / Искусственный интеллект / Хабr**")
    print("<https://habr.com/ru/hubs/artificial_intelligence/articles/top/daily/>")
    print()
    for i, article in enumerate(articles[:5], 1):
        date_str = ""
        if article["pub_date"]:
            try:
                d = article["pub_date"].replace(" GMT", " +0000")
                dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z")
                date_str = dt.strftime("%d.%m.%Y")
            except ValueError:
                date_str = article["pub_date"][:10].replace("-", ".")
        title = article["title"]
        if len(title) > 60:
            title = title[:60] + "..."
        print(f"{title}\n   {date_str}   {article['link']}")
        print("────────────────────────────────────────")
        print()

    # Второй блок: посты
    posts = _fetch_rss(RSS_URL_POSTS)
    if posts:
        print("**Лучшие новости за сутки / Искусственный интеллект / Хабr**")
        print("<https://habr.com/ru/hubs/artificial_intelligence/news/top/daily/>")
        print()
        for i, article in enumerate(posts[:5], 1):
            date_str = ""
            if article["pub_date"]:
                try:
                    d = article["pub_date"].replace(" GMT", " +0000")
                    dt = datetime.strptime(d, "%a, %d %b %Y %H:%M:%S %z")
                    date_str = dt.strftime("%d.%m.%Y")
                except ValueError:
                    date_str = article["pub_date"][:10].replace("-", ".")
            title = article["title"]
            if len(title) > 60:
                title = title[:60] + "..."
            print(f"{title}\n   {date_str}   {article['link']}")
            print("────────────────────────────────────────")
            print()


def _fetch_rss(url):
    """Скачать и распарсить RSS-ленту (RSS 2.0 / Atom)."""
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        root = ElementTree.fromstring(response.content)

        # RSS 2.0
        ns_dc = {"dc": "http://purl.org/dc/elements/1.1/"}
        items = root.findall(".//item")
        if not items:
            # Atom
            ns = {"atom": "http://www.w3.org/2005/Atom"}
            items = root.findall("atom:entry", ns)
        if not items:
            return []

        articles = []
        for entry in items:
            # RSS 2.0
            title_el = entry.find("title")
            date_el = entry.find("pubDate")
            creator_el = entry.find("dc:creator", ns_dc)
            categories = entry.findall("category")

            # guid с isPermaLink="true" для чистого URL
            guid_el = entry.find("guid[@isPermaLink='true']")
            link = guid_el.text if guid_el is not None else ""

            # Atom fallback
            if title_el is None:
                ns = {"atom": "http://www.w3.org/2005/Atom"}
                title_el = entry.find("atom:title", ns)
                link_el = entry.find("atom:link", ns)
                link = link_el.get("href", "") if link_el is not None else ""
                date_el = entry.find("atom:published", ns)
                creator_el = entry.find("atom:author/atom:name", ns)
                categories = entry.findall("atom:category", ns)

            title = title_el.text if title_el is not None else "Без названия"
            pub_date = date_el.text if date_el is not None else ""
            creator = creator_el.text if creator_el is not None else ""
            tags = [cat.text for cat in categories if cat.text] if categories else []

            articles.append({
                "title": title,
                "link": link,
                "pub_date": pub_date,
                "creator": creator,
                "tags": tags,
            })
        return articles[:10]
    except requests.RequestException:
        return None