1use crate::sources::news::cache::{ARTICLE_CACHE, ARTICLE_CACHE_TTL_SECONDS, ArticleCacheEntry};
4use crate::sources::news::parse::parse_arch_news_html;
5use crate::sources::news::utils::is_archlinux_url;
6use crate::sources::news::{
7 aur::extract_aur_pkg_from_url,
8 cache::{load_article_entry_from_disk_cache, save_article_to_disk_cache},
9 utils::is_arch_package_url,
10};
11use crate::state::NewsItem;
12use reqwest;
13use std::sync::LazyLock;
14use std::time::{Duration, Instant};
15use tracing::{info, warn};
16
17type Result<T> = super::Result<T>;
19
20fn extract_official_package_cache_path(url: &str) -> Option<std::path::PathBuf> {
32 let lower = url.to_ascii_lowercase();
33 let pos = lower.find("archlinux.org/packages/")?;
34 let after = &url[pos + "archlinux.org/packages/".len()..];
35 let parts: Vec<&str> = after.split('/').filter(|s| !s.is_empty()).collect();
36 if parts.len() >= 3 {
37 let repo = parts[0];
38 let arch = parts[1];
39 let name = parts[2]
40 .split('?')
41 .next()
42 .unwrap_or(parts[2])
43 .split('#')
44 .next()
45 .unwrap_or(parts[2]);
46 Some(crate::sources::official_json_cache_path(repo, arch, name))
47 } else {
48 None
49 }
50}
51
52fn prepend_official_package_changes(url: &str, content: &str) -> String {
64 let Some(cache_path) = extract_official_package_cache_path(url) else {
65 return content.to_string();
66 };
67
68 let Some(cached_json) = crate::sources::load_official_json_cache(&cache_path) else {
69 return content.to_string();
70 };
71
72 let pkg_obj = cached_json.get("pkg").unwrap_or(&cached_json);
73
74 let Some(pkg_name) = pkg_obj.get("pkgname").and_then(serde_json::Value::as_str) else {
75 return content.to_string();
76 };
77
78 let Some(changes) = crate::sources::get_official_json_changes(pkg_name) else {
79 return content.to_string();
80 };
81
82 if content.starts_with("Changes detected") {
83 content.to_string()
84 } else {
85 format!("{changes}\n\n─── Package Info ───\n\n{content}")
86 }
87}
88
89static HTTP_CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| {
93 use reqwest::header::{ACCEPT, ACCEPT_LANGUAGE, HeaderMap, HeaderValue};
94 let mut headers = HeaderMap::new();
95 headers.insert(
97 ACCEPT,
98 HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
99 );
100 headers.insert(ACCEPT_LANGUAGE, HeaderValue::from_static("en-US,en;q=0.5"));
102 reqwest::Client::builder()
103 .connect_timeout(Duration::from_secs(15))
104 .timeout(Duration::from_secs(30))
105 .user_agent(format!(
107 "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0 Pacsea/{}",
108 env!("CARGO_PKG_VERSION")
109 ))
110 .default_headers(headers)
111 .build()
112 .expect("Failed to create HTTP client")
113});
114
115pub async fn fetch_arch_news(limit: usize, cutoff_date: Option<&str>) -> Result<Vec<NewsItem>> {
133 use crate::sources::news::utils::{extract_between, strip_time_and_tz};
134
135 let url = "https://archlinux.org/feeds/news/";
136 let body = tokio::task::spawn_blocking(move || {
138 crate::util::curl::curl_text_with_args(
139 url,
140 &["--connect-timeout", "10", "--max-time", "15"],
141 )
142 })
143 .await?
144 .map_err(|e| {
145 warn!(error = %e, "failed to fetch arch news feed");
146 e
147 })?;
148 info!(bytes = body.len(), "fetched arch news feed");
149 let mut items: Vec<NewsItem> = Vec::new();
150 let mut pos = 0;
151 while items.len() < limit {
152 if let Some(start) = body[pos..].find("<item>") {
153 let s = pos + start;
154 let end = body[s..].find("</item>").map_or(body.len(), |e| s + e + 7);
155 let chunk = &body[s..end];
156 let title = extract_between(chunk, "<title>", "</title>").unwrap_or_default();
157 let link = extract_between(chunk, "<link>", "</link>").unwrap_or_default();
158 let raw_date = extract_between(chunk, "<pubDate>", "</pubDate>")
159 .map(|d| d.trim().to_string())
160 .unwrap_or_default();
161 let date = strip_time_and_tz(&raw_date);
162 if let Some(cutoff) = cutoff_date
164 && date.as_str() < cutoff
165 {
166 break;
167 }
168 items.push(NewsItem {
169 date,
170 title,
171 url: link,
172 });
173 pos = end;
174 } else {
175 break;
176 }
177 }
178 info!(count = items.len(), "parsed arch news feed");
179 Ok(items)
180}
181
182pub async fn fetch_news_content(url: &str) -> Result<String> {
202 use crate::sources::news::aur::render_aur_comments;
203
204 if let Some(pkg) = extract_aur_pkg_from_url(url) {
205 let changes = crate::sources::get_aur_json_changes(&pkg);
207 let comments = crate::sources::fetch_aur_comments(pkg.clone()).await?;
208 let mut rendered = render_aur_comments(&pkg, &comments);
209
210 if let Some(changes_text) = changes {
212 rendered = format!("{changes_text}\n\n─── AUR Comments ───\n\n{rendered}");
213 }
214
215 return Ok(rendered);
216 }
217
218 if is_arch_package_url(url)
220 && let Ok(cache) = ARTICLE_CACHE.lock()
221 && let Some(entry) = cache.get(url)
222 && entry.timestamp.elapsed().as_secs() < ARTICLE_CACHE_TTL_SECONDS
223 {
224 let content = prepend_official_package_changes(url, &entry.content);
225 return Ok(content);
226 }
227
228 let cached_entry: Option<ArticleCacheEntry> = if let Ok(cache) = ARTICLE_CACHE.lock()
230 && let Some(entry) = cache.get(url)
231 && entry.timestamp.elapsed().as_secs() < ARTICLE_CACHE_TTL_SECONDS
232 {
233 info!(url, "using in-memory cached article content");
234 return Ok(entry.content.clone());
235 } else {
236 None
237 };
238
239 let disk_entry = load_article_entry_from_disk_cache(url);
241 if let Some(ref entry) = disk_entry {
242 if let Ok(mut cache) = ARTICLE_CACHE.lock() {
244 cache.insert(
245 url.to_string(),
246 ArticleCacheEntry {
247 content: entry.content.clone(),
248 timestamp: Instant::now(),
249 etag: entry.etag.clone(),
250 last_modified: entry.last_modified.clone(),
251 },
252 );
253 }
254 if is_arch_package_url(url) {
256 let content = prepend_official_package_changes(url, &entry.content);
257 return Ok(content);
258 }
259 return Ok(entry.content.clone());
260 }
261
262 let endpoint_pattern = crate::sources::feeds::extract_endpoint_pattern(url);
264 if let Err(e) = crate::sources::feeds::check_circuit_breaker(&endpoint_pattern) {
265 warn!(url, endpoint_pattern, error = %e, "circuit breaker blocking request");
266 if let Some(cached) = cached_entry {
268 return Ok(cached.content);
269 }
270 if let Some(disk) = disk_entry {
271 return Ok(disk.content);
272 }
273 return Err(e);
274 }
275
276 let cached_etag = cached_entry
279 .as_ref()
280 .and_then(|e: &ArticleCacheEntry| e.etag.as_ref())
281 .or_else(|| disk_entry.as_ref().and_then(|e| e.etag.as_ref()))
282 .cloned();
283 let cached_last_modified = cached_entry
284 .as_ref()
285 .and_then(|e: &ArticleCacheEntry| e.last_modified.as_ref())
286 .or_else(|| disk_entry.as_ref().and_then(|e| e.last_modified.as_ref()))
287 .cloned();
288
289 let (body, etag, last_modified) =
291 match fetch_from_network(url, cached_etag, cached_last_modified, &endpoint_pattern).await {
292 Ok(result) => result,
293 Err(e) if e.to_string() == "304 Not Modified" => {
294 if let Some(cached) = cached_entry {
296 return Ok(cached.content);
297 }
298 if let Some(disk) = disk_entry {
299 return Ok(disk.content);
300 }
301 warn!(url, "304 response but no cached content available");
302 return Err("304 Not Modified but no cache available".into());
303 }
304 Err(e) => return Err(e),
305 };
306
307 let content = parse_arch_news_html(&body, Some(url));
309
310 let content = if is_arch_package_url(url) {
312 prepend_official_package_changes(url, &content)
313 } else {
314 content
315 };
316
317 let parsed_len = content.len();
318 if parsed_len == 0 {
319 warn!(url, "parsed news content is empty");
320 } else {
321 info!(url, parsed_len, "parsed news content");
322 }
323
324 if let Ok(mut cache) = ARTICLE_CACHE.lock() {
327 cache.insert(
328 url.to_string(),
329 ArticleCacheEntry {
330 content: content.clone(),
331 timestamp: Instant::now(),
332 etag: etag.clone(),
333 last_modified: last_modified.clone(),
334 },
335 );
336 }
337 save_article_to_disk_cache(url, &content, etag, last_modified);
339
340 Ok(content)
341}
342
343async fn fetch_from_network(
360 url: &str,
361 cached_etag: Option<String>,
362 cached_last_modified: Option<String>,
363 endpoint_pattern: &str,
364) -> Result<(String, Option<String>, Option<String>)> {
365 let _permit = if is_archlinux_url(url) {
367 Some(crate::sources::feeds::rate_limit_archlinux().await)
368 } else {
369 None
370 };
371
372 let client = HTTP_CLIENT.clone();
374 let mut request = client.get(url);
375
376 if let Some(ref etag) = cached_etag {
378 request = request.header("If-None-Match", etag);
379 }
380 if let Some(ref last_mod) = cached_last_modified {
381 request = request.header("If-Modified-Since", last_mod);
382 }
383
384 let http_response = request.send().await.map_err(|e| {
385 warn!(error = %e, url, "failed to fetch news content");
386 crate::sources::feeds::record_circuit_breaker_outcome(endpoint_pattern, false);
387 Box::<dyn std::error::Error + Send + Sync>::from(format!("Network error: {e}"))
388 })?;
389
390 let status = http_response.status();
391 let status_code = status.as_u16();
392
393 if status_code == 304 {
395 info!(
396 url,
397 "server returned 304 Not Modified, using cached content"
398 );
399 return Err("304 Not Modified".into());
400 }
401
402 let etag = http_response
404 .headers()
405 .get("etag")
406 .and_then(|h| h.to_str().ok())
407 .map(ToString::to_string);
408 let last_modified = http_response
409 .headers()
410 .get("last-modified")
411 .and_then(|h| h.to_str().ok())
412 .map(ToString::to_string);
413
414 if status.is_client_error() || status.is_server_error() {
416 crate::sources::feeds::record_circuit_breaker_outcome(endpoint_pattern, false);
417 return Err(handle_http_error(status, status_code, &http_response).into());
418 }
419
420 let body = http_response.text().await.map_err(|e| {
421 warn!(error = %e, url, "failed to read response body");
422 Box::<dyn std::error::Error + Send + Sync>::from(format!("Failed to read response: {e}"))
423 })?;
424
425 info!(url, bytes = body.len(), "fetched news page");
426 crate::sources::feeds::record_circuit_breaker_outcome(endpoint_pattern, true);
427
428 Ok((body, etag, last_modified))
429}
430
431fn handle_http_error(
445 status: reqwest::StatusCode,
446 status_code: u16,
447 http_response: &reqwest::Response,
448) -> String {
449 if status_code == 429 {
450 let mut msg = "HTTP 429 Too Many Requests - rate limited by server".to_string();
451 if let Some(retry_after) = http_response.headers().get("retry-after")
452 && let Ok(retry_str) = retry_after.to_str()
453 {
454 msg.push_str(" (Retry-After: ");
455 msg.push_str(retry_str);
456 msg.push(')');
457 }
458 msg
459 } else if status_code == 503 {
460 let mut msg = "HTTP 503 Service Unavailable".to_string();
461 if let Some(retry_after) = http_response.headers().get("retry-after")
462 && let Ok(retry_str) = retry_after.to_str()
463 {
464 msg.push_str(" (Retry-After: ");
465 msg.push_str(retry_str);
466 msg.push(')');
467 }
468 msg
469 } else {
470 format!("HTTP error: {status}")
471 }
472}