65 lines
2.2 KiB
Python
65 lines
2.2 KiB
Python
import time
|
|
from typing import Any
|
|
from urllib.parse import urlparse
|
|
|
|
import feedparser
|
|
import httpx
|
|
|
|
from app.config import get_settings
|
|
|
|
_cache: dict[str, Any] = {"items": [], "expires_at": 0.0}
|
|
|
|
|
|
class RssClient:
|
|
def __init__(self) -> None:
|
|
settings = get_settings()
|
|
self.urls = settings.news_rss_urls_list
|
|
self.cache_ttl = settings.news_cache_sec
|
|
self.max_items = settings.news_max_items
|
|
|
|
def _fetch_feed(self, url: str) -> list[dict[str, str]]:
|
|
headers = {"User-Agent": "HomeAIAssistant/1.0"}
|
|
with httpx.Client(timeout=20.0, headers=headers, follow_redirects=True) as client:
|
|
response = client.get(url)
|
|
response.raise_for_status()
|
|
parsed = feedparser.parse(response.content)
|
|
|
|
source = urlparse(url).netloc or url
|
|
items: list[dict[str, str]] = []
|
|
for entry in parsed.entries[: self.max_items]:
|
|
link = (entry.get("link") or "").strip()
|
|
title = (entry.get("title") or "").strip()
|
|
if not title:
|
|
continue
|
|
items.append({
|
|
"title": title,
|
|
"link": link,
|
|
"source": source,
|
|
"published": (entry.get("published") or entry.get("updated") or "").strip(),
|
|
})
|
|
return items
|
|
|
|
def fetch_headlines(self, limit: int | None = None) -> list[dict[str, str]]:
|
|
now = time.time()
|
|
if _cache["items"] and now < _cache["expires_at"]:
|
|
items = _cache["items"]
|
|
else:
|
|
merged: list[dict[str, str]] = []
|
|
seen_links: set[str] = set()
|
|
for url in self.urls:
|
|
try:
|
|
for item in self._fetch_feed(url):
|
|
link = item.get("link") or item["title"]
|
|
if link in seen_links:
|
|
continue
|
|
seen_links.add(link)
|
|
merged.append(item)
|
|
except Exception:
|
|
continue
|
|
_cache["items"] = merged
|
|
_cache["expires_at"] = now + self.cache_ttl
|
|
items = merged
|
|
|
|
cap = limit or self.max_items
|
|
return items[:cap]
|