pacsea/sources/feeds/
mod.rs

1//! Aggregated news feed fetcher (Arch news + security advisories).
2mod cache;
3mod helpers;
4mod news_fetch;
5mod rate_limit;
6mod updates;
7
8use std::collections::{HashMap, HashSet};
9use std::hash::BuildHasher;
10use std::path::PathBuf;
11
12use crate::state::types::{NewsFeedItem, NewsSortMode, severity_rank};
13use tracing::{info, warn};
14
15use helpers::load_update_versions;
16use news_fetch::fetch_slow_sources;
17use updates::{fetch_installed_aur_comments, fetch_installed_updates};
18
19/// Result type alias for news feed fetching operations.
20type Result<T> = super::Result<T>;
21
22/// What: Calculate optimal `max_age_days` based on last startup timestamp.
23///
24/// Inputs:
25/// - `last_startup`: Optional timestamp in `YYYYMMDD:HHMMSS` format.
26/// - `default_max_age`: Default max age in days if no optimization applies.
27///
28/// Output:
29/// - Optimized `max_age_days` value, or `None` to fetch all.
30///
31/// Details:
32/// - If last startup was within 1 hour: use 1 day (recent data likely cached)
33/// - If last startup was within 24 hours: use 2 days
34/// - If last startup was within 7 days: use configured `max_age` or 7 days
35/// - Otherwise: use configured `max_age`
36/// - This reduces unnecessary fetching when the app was recently used.
37/// - NOTE: This only affects Arch news and advisories date filtering.
38///   Package updates are ALWAYS fetched fresh to detect new packages and version changes.
39#[must_use]
40pub fn optimize_max_age_for_startup(
41    last_startup: Option<&str>,
42    default_max_age: Option<u32>,
43) -> Option<u32> {
44    let Some(ts) = last_startup else {
45        // No previous startup recorded, use default
46        return default_max_age;
47    };
48
49    // Parse timestamp: YYYYMMDD:HHMMSS
50    let parsed = chrono::NaiveDateTime::parse_from_str(ts, "%Y%m%d:%H%M%S").ok();
51    let Some(last_dt) = parsed else {
52        tracing::debug!(timestamp = %ts, "failed to parse last startup timestamp");
53        return default_max_age;
54    };
55
56    let now = chrono::Local::now().naive_local();
57    let elapsed = now.signed_duration_since(last_dt);
58
59    if elapsed.num_hours() < 1 {
60        // Very recent startup (< 1 hour): minimal fresh fetch needed
61        info!(
62            hours_since_last = elapsed.num_hours(),
63            "recent startup detected, using minimal fetch window"
64        );
65        Some(1)
66    } else if elapsed.num_hours() < 24 {
67        // Within last day: use 2 days to be safe
68        info!(
69            hours_since_last = elapsed.num_hours(),
70            "startup within 24h, using 2-day fetch window"
71        );
72        Some(2)
73    } else if elapsed.num_days() < 7 {
74        // Within last week: use configured or 7 days
75        let optimized = default_max_age.map_or(7, |d| d.min(7));
76        info!(
77            days_since_last = elapsed.num_days(),
78            optimized_max_age = optimized,
79            "startup within 7 days, using optimized fetch window"
80        );
81        Some(optimized)
82    } else {
83        // More than a week: use configured max_age
84        default_max_age
85    }
86}
87
88/// What: Input context for fetching a combined news feed.
89///
90/// Inputs:
91/// - `limit`: Maximum number of items per source.
92/// - `include_*`: Source toggles.
93/// - `installed_filter`: Optional installed-package set for scoping.
94/// - `installed_only`: Whether to restrict advisories to installed packages.
95/// - `sort_mode`: Sort order.
96/// - `seen_pkg_versions`: Last-seen map for package updates.
97/// - `seen_aur_comments`: Last-seen map for AUR comments.
98/// - `max_age_days`: Optional maximum age in days for filtering items (enables early filtering).
99///
100/// Output:
101/// - Mutable references updated in place alongside returned feed items.
102///
103/// Details:
104/// - Hashers are generic to remain compatible with caller-supplied maps.
105/// - `max_age_days` enables early date filtering during fetch to improve performance.
106#[allow(clippy::struct_excessive_bools)]
107pub struct NewsFeedContext<'a, HS, HV, HC>
108where
109    HS: BuildHasher + Send + Sync + 'static,
110    HV: BuildHasher + Send + Sync + 'static,
111    HC: BuildHasher + Send + Sync + 'static,
112{
113    /// Emit all sources even on first run (bypasses baseline gating).
114    pub force_emit_all: bool,
115    /// Optional path to `available_updates.txt` for filtering noisy first-run emissions.
116    pub updates_list_path: Option<PathBuf>,
117    /// Maximum number of items per source.
118    pub limit: usize,
119    /// Whether to include Arch news RSS posts.
120    pub include_arch_news: bool,
121    /// Whether to include security advisories.
122    pub include_advisories: bool,
123    /// Whether to include installed package updates.
124    pub include_pkg_updates: bool,
125    /// Whether to include installed AUR comments.
126    pub include_aur_comments: bool,
127    /// Optional installed-package filter set.
128    pub installed_filter: Option<&'a HashSet<String, HS>>,
129    /// Whether to restrict advisories to installed packages.
130    pub installed_only: bool,
131    /// Sort mode for the resulting feed.
132    pub sort_mode: NewsSortMode,
133    /// Last-seen versions map (updated in place).
134    pub seen_pkg_versions: &'a mut HashMap<String, String, HV>,
135    /// Last-seen AUR comments map (updated in place).
136    pub seen_aur_comments: &'a mut HashMap<String, String, HC>,
137    /// Optional maximum age in days for early date filtering during fetch.
138    pub max_age_days: Option<u32>,
139}
140
141/// Configuration for fetching fast sources.
142struct FastSourcesConfig<'a, HS, HV, HC> {
143    /// Whether to fetch package updates.
144    include_pkg_updates: bool,
145    /// Whether to fetch AUR comments.
146    include_aur_comments: bool,
147    /// Optional set of installed package names.
148    installed_filter: Option<&'a HashSet<String, HS>>,
149    /// Maximum items per source.
150    limit: usize,
151    /// Last-seen versions map (updated in place).
152    seen_pkg_versions: &'a mut HashMap<String, String, HV>,
153    /// Last-seen AUR comments map (updated in place).
154    seen_aur_comments: &'a mut HashMap<String, String, HC>,
155    /// Whether to emit all items regardless of last-seen.
156    force_emit_all: bool,
157    /// Optional pre-loaded update versions.
158    updates_versions: Option<&'a HashMap<String, (String, String)>>,
159}
160
161/// What: Fetch fast sources (package updates and AUR comments) in parallel.
162///
163/// Inputs:
164/// - `config`: Configuration struct containing all fetch parameters.
165///
166/// Output:
167/// - Tuple of (`updates_result`, `comments_result`).
168///
169/// Details:
170/// - Fetches both sources in parallel for better performance.
171/// - Returns empty vectors on errors (graceful degradation).
172async fn fetch_fast_sources<HS, HV, HC>(
173    config: FastSourcesConfig<'_, HS, HV, HC>,
174) -> (
175    std::result::Result<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>,
176    std::result::Result<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>,
177)
178where
179    HS: BuildHasher + Send + Sync + 'static,
180    HV: BuildHasher + Send + Sync + 'static,
181    HC: BuildHasher + Send + Sync + 'static,
182{
183    tokio::join!(
184        async {
185            if config.include_pkg_updates {
186                if let Some(installed) = config.installed_filter {
187                    if installed.is_empty() {
188                        warn!(
189                            "include_pkg_updates set but installed set is empty; skipping updates"
190                        );
191                        Ok::<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>(Vec::new())
192                    } else {
193                        info!(
194                            "fetching package updates: installed_count={}, limit={}",
195                            installed.len(),
196                            config.limit
197                        );
198                        let result = fetch_installed_updates(
199                            installed,
200                            config.limit,
201                            config.seen_pkg_versions,
202                            config.force_emit_all,
203                            config.updates_versions,
204                        )
205                        .await;
206                        match &result {
207                            Ok(updates) => {
208                                info!("package updates fetch completed: items={}", updates.len());
209                            }
210                            Err(e) => {
211                                warn!(error = %e, "installed package updates fetch failed");
212                            }
213                        }
214                        match result {
215                            Ok(updates) => Ok(updates),
216                            Err(_e) => Ok::<
217                                Vec<NewsFeedItem>,
218                                Box<dyn std::error::Error + Send + Sync>,
219                            >(Vec::new()),
220                        }
221                    }
222                } else {
223                    warn!("include_pkg_updates set but installed_filter missing; skipping updates");
224                    Ok::<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>(Vec::new())
225                }
226            } else {
227                Ok::<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>(Vec::new())
228            }
229        },
230        async {
231            if config.include_aur_comments {
232                if let Some(installed) = config.installed_filter {
233                    if installed.is_empty() {
234                        warn!(
235                            "include_aur_comments set but installed set is empty; skipping comments"
236                        );
237                        Ok::<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>(Vec::new())
238                    } else {
239                        info!(
240                            "fetching AUR comments: installed_count={}, limit={}",
241                            installed.len(),
242                            config.limit
243                        );
244                        let result = fetch_installed_aur_comments(
245                            installed,
246                            config.limit,
247                            config.seen_aur_comments,
248                            config.force_emit_all,
249                        )
250                        .await;
251                        match &result {
252                            Ok(comments) => {
253                                info!("AUR comments fetch completed: items={}", comments.len());
254                            }
255                            Err(e) => {
256                                warn!(error = %e, "installed AUR comments fetch failed");
257                            }
258                        }
259                        match result {
260                            Ok(comments) => Ok(comments),
261                            Err(_e) => Ok::<
262                                Vec<NewsFeedItem>,
263                                Box<dyn std::error::Error + Send + Sync>,
264                            >(Vec::new()),
265                        }
266                    }
267                } else {
268                    warn!(
269                        "include_aur_comments set but installed_filter missing; skipping comments"
270                    );
271                    Ok::<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>(Vec::new())
272                }
273            } else {
274                Ok::<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>(Vec::new())
275            }
276        }
277    )
278}
279
280/// What: Combine feed results from all sources into a single sorted vector.
281///
282/// Inputs:
283/// - `arch_result`: Arch news fetch result.
284/// - `advisories_result`: Advisories fetch result.
285/// - `updates_result`: Package updates fetch result.
286/// - `comments_result`: AUR comments fetch result.
287/// - `sort_mode`: Sort mode for the final result.
288///
289/// Output:
290/// - Combined and sorted vector of news feed items.
291///
292/// Details:
293/// - Gracefully handles errors by logging warnings and continuing.
294/// - Sorts items according to the specified sort mode.
295fn combine_feed_results(
296    arch_result: std::result::Result<Vec<NewsFeedItem>, Box<dyn std::error::Error + Send + Sync>>,
297    advisories_result: std::result::Result<
298        Vec<NewsFeedItem>,
299        Box<dyn std::error::Error + Send + Sync>,
300    >,
301    updates_result: std::result::Result<
302        Vec<NewsFeedItem>,
303        Box<dyn std::error::Error + Send + Sync>,
304    >,
305    comments_result: std::result::Result<
306        Vec<NewsFeedItem>,
307        Box<dyn std::error::Error + Send + Sync>,
308    >,
309    sort_mode: NewsSortMode,
310) -> Vec<NewsFeedItem> {
311    let mut items: Vec<NewsFeedItem> = Vec::new();
312    match arch_result {
313        Ok(mut arch_items) => items.append(&mut arch_items),
314        Err(e) => warn!(error = %e, "arch news fetch failed; continuing without Arch news"),
315    }
316    match advisories_result {
317        Ok(mut adv_items) => items.append(&mut adv_items),
318        Err(e) => warn!(error = %e, "advisories fetch failed; continuing without advisories"),
319    }
320    match updates_result {
321        Ok(mut upd_items) => items.append(&mut upd_items),
322        Err(e) => warn!(error = %e, "updates fetch failed; continuing without updates"),
323    }
324    match comments_result {
325        Ok(mut cmt_items) => items.append(&mut cmt_items),
326        Err(e) => warn!(error = %e, "comments fetch failed; continuing without comments"),
327    }
328    sort_news_items(&mut items, sort_mode);
329    items
330}
331
332/// Return type for `prepare_fetch_context` function.
333type PrepareFetchContextReturn<'a, HS, HV, HC> = (
334    Option<String>,
335    Option<HashMap<String, (String, String)>>,
336    usize,
337    bool,
338    bool,
339    bool,
340    bool,
341    Option<&'a HashSet<String, HS>>,
342    bool,
343    NewsSortMode,
344    &'a mut HashMap<String, String, HV>,
345    &'a mut HashMap<String, String, HC>,
346    bool,
347);
348
349/// What: Prepare fetch context and calculate derived values.
350///
351/// Inputs:
352/// - `ctx`: News feed context.
353///
354/// Output:
355/// - Tuple of (`cutoff_date`, `updates_versions`, and extracted context fields).
356///
357/// Details:
358/// - Extracts context fields and calculates cutoff date and update versions.
359fn prepare_fetch_context<HS, HV, HC>(
360    ctx: NewsFeedContext<'_, HS, HV, HC>,
361) -> PrepareFetchContextReturn<'_, HS, HV, HC>
362where
363    HS: BuildHasher + Send + Sync + 'static,
364    HV: BuildHasher + Send + Sync + 'static,
365    HC: BuildHasher + Send + Sync + 'static,
366{
367    let NewsFeedContext {
368        limit,
369        include_arch_news,
370        include_advisories,
371        include_pkg_updates,
372        include_aur_comments,
373        installed_filter,
374        installed_only,
375        sort_mode,
376        seen_pkg_versions,
377        seen_aur_comments,
378        force_emit_all,
379        updates_list_path,
380        max_age_days,
381    } = ctx;
382
383    info!(
384        limit,
385        include_arch_news,
386        include_advisories,
387        include_pkg_updates,
388        include_aur_comments,
389        installed_only,
390        installed_filter = installed_filter.is_some(),
391        sort_mode = ?sort_mode,
392        max_age_days,
393        "fetch_news_feed start"
394    );
395
396    let cutoff_date = max_age_days.and_then(|days| {
397        chrono::Utc::now()
398            .checked_sub_signed(chrono::Duration::days(i64::from(days)))
399            .map(|dt| dt.format("%Y-%m-%d").to_string())
400    });
401    let updates_versions = if force_emit_all {
402        load_update_versions(updates_list_path.as_ref())
403    } else {
404        None
405    };
406
407    (
408        cutoff_date,
409        updates_versions,
410        limit,
411        include_arch_news,
412        include_advisories,
413        include_pkg_updates,
414        include_aur_comments,
415        installed_filter,
416        installed_only,
417        sort_mode,
418        seen_pkg_versions,
419        seen_aur_comments,
420        force_emit_all,
421    )
422}
423
424/// What: Sort news feed items by the specified mode.
425///
426/// Inputs:
427/// - `items`: Mutable slice of news feed items to sort.
428/// - `mode`: Sort mode (date descending, etc.).
429///
430/// Output: Items are sorted in place.
431///
432/// Details: Sorts news items according to the specified sort mode.
433fn sort_news_items(items: &mut [NewsFeedItem], mode: NewsSortMode) {
434    match mode {
435        NewsSortMode::DateDesc => items.sort_by(|a, b| b.date.cmp(&a.date)),
436        NewsSortMode::DateAsc => items.sort_by(|a, b| a.date.cmp(&b.date)),
437        NewsSortMode::Title => {
438            items.sort_by(|a, b| {
439                a.title
440                    .to_lowercase()
441                    .cmp(&b.title.to_lowercase())
442                    .then(b.date.cmp(&a.date))
443            });
444        }
445        NewsSortMode::SourceThenTitle => items.sort_by(|a, b| {
446            a.source
447                .cmp(&b.source)
448                .then(b.date.cmp(&a.date))
449                .then(a.title.to_lowercase().cmp(&b.title.to_lowercase()))
450        }),
451        NewsSortMode::SeverityThenDate => items.sort_by(|a, b| {
452            let sa = severity_rank(a.severity);
453            let sb = severity_rank(b.severity);
454            sb.cmp(&sa)
455                .then(b.date.cmp(&a.date))
456                .then(a.title.to_lowercase().cmp(&b.title.to_lowercase()))
457        }),
458        NewsSortMode::UnreadThenDate => {
459            // Fetch pipeline lacks read-state context; fall back to newest-first.
460            items.sort_by(|a, b| b.date.cmp(&a.date));
461        }
462    }
463}
464
465/// # Errors
466/// - Network failures fetching sources
467/// - JSON parse errors from upstream feeds
468pub async fn fetch_news_feed<HS, HV, HC>(
469    ctx: NewsFeedContext<'_, HS, HV, HC>,
470) -> Result<Vec<NewsFeedItem>>
471where
472    HS: BuildHasher + Send + Sync + 'static,
473    HV: BuildHasher + Send + Sync + 'static,
474    HC: BuildHasher + Send + Sync + 'static,
475{
476    let (
477        cutoff_date,
478        updates_versions,
479        limit,
480        include_arch_news,
481        include_advisories,
482        include_pkg_updates,
483        include_aur_comments,
484        installed_filter,
485        installed_only,
486        sort_mode,
487        seen_pkg_versions,
488        seen_aur_comments,
489        force_emit_all,
490    ) = prepare_fetch_context(ctx);
491
492    info!(
493        "starting fetch: arch_news={include_arch_news}, advisories={include_advisories}, pkg_updates={include_pkg_updates}, aur_comments={include_aur_comments}"
494    );
495    rate_limit::reset_archlinux_backoff();
496
497    // Fetch ALL sources in parallel for best responsiveness:
498    // - Fast sources (AUR comments, package updates) run in parallel and complete quickly
499    // - Slow sources (arch news, advisories from archlinux.org) run sequentially with each other
500    //   but IN PARALLEL with the fast sources, so they don't block everything
501    let ((updates_result, comments_result), (arch_result, advisories_result)) = tokio::join!(
502        fetch_fast_sources(FastSourcesConfig {
503            include_pkg_updates,
504            include_aur_comments,
505            installed_filter,
506            limit,
507            seen_pkg_versions,
508            seen_aur_comments,
509            force_emit_all,
510            updates_versions: updates_versions.as_ref(),
511        }),
512        fetch_slow_sources(
513            include_arch_news,
514            include_advisories,
515            limit,
516            installed_filter,
517            installed_only,
518            cutoff_date.as_deref(),
519        )
520    );
521    info!("fetch completed, combining results...");
522
523    let items = combine_feed_results(
524        arch_result,
525        advisories_result,
526        updates_result,
527        comments_result,
528        sort_mode,
529    );
530    info!(
531        total = items.len(),
532        arch = items
533            .iter()
534            .filter(|i| matches!(i.source, crate::state::types::NewsFeedSource::ArchNews))
535            .count(),
536        advisories = items
537            .iter()
538            .filter(|i| matches!(
539                i.source,
540                crate::state::types::NewsFeedSource::SecurityAdvisory
541            ))
542            .count(),
543        updates = items
544            .iter()
545            .filter(|i| {
546                matches!(
547                    i.source,
548                    crate::state::types::NewsFeedSource::InstalledPackageUpdate
549                        | crate::state::types::NewsFeedSource::AurPackageUpdate
550                )
551            })
552            .count(),
553        aur_comments = items
554            .iter()
555            .filter(|i| matches!(i.source, crate::state::types::NewsFeedSource::AurComment))
556            .count(),
557        "fetch_news_feed success"
558    );
559    Ok(items)
560}
561
562/// Limit for continuation fetching (effectively unlimited).
563const CONTINUATION_LIMIT: usize = 1000;
564
565/// What: Fetch continuation items for background loading after initial batch.
566///
567/// Inputs:
568/// - `installed`: Set of installed package names.
569/// - `initial_ids`: IDs of items already fetched in initial batch.
570///
571/// Output:
572/// - `Ok(Vec<NewsFeedItem>)`: Additional items not in initial batch.
573///
574/// # Errors
575/// - Network errors when fetching from any source.
576/// - Parsing errors from upstream feeds.
577///
578/// Details:
579/// - Fetches items from all sources with a high limit (1000).
580/// - Filters out items already in `initial_ids`.
581/// - Used by background continuation worker to stream additional items to UI.
582pub async fn fetch_continuation_items<HS, HI>(
583    installed: &HashSet<String, HS>,
584    initial_ids: &HashSet<String, HI>,
585) -> Result<Vec<NewsFeedItem>>
586where
587    HS: std::hash::BuildHasher + Send + Sync + 'static,
588    HI: std::hash::BuildHasher + Send + Sync,
589{
590    use crate::state::types::NewsFeedSource;
591
592    info!(
593        installed_count = installed.len(),
594        initial_count = initial_ids.len(),
595        "starting continuation fetch"
596    );
597
598    // Fetch from all sources in parallel
599    let ((updates_result, comments_result), (arch_result, advisories_result)) = tokio::join!(
600        async {
601            // Package updates - use fresh seen maps (continuation doesn't track seen state)
602            let mut seen_versions: HashMap<String, String> = HashMap::new();
603            let mut seen_aur_comments: HashMap<String, String> = HashMap::new();
604            let updates = fetch_installed_updates(
605                installed,
606                CONTINUATION_LIMIT,
607                &mut seen_versions,
608                true, // force_emit_all
609                None,
610            )
611            .await;
612            let comments = fetch_installed_aur_comments(
613                installed,
614                CONTINUATION_LIMIT,
615                &mut seen_aur_comments,
616                true, // force_emit_all
617            )
618            .await;
619            (updates, comments)
620        },
621        fetch_slow_sources(
622            true, // include_arch_news
623            true, // include_advisories
624            CONTINUATION_LIMIT,
625            Some(installed),
626            false, // installed_only
627            None,  // cutoff_date
628        )
629    );
630
631    let mut items = Vec::new();
632
633    // Add Arch news (filter out already-sent items)
634    if let Ok(arch_items) = arch_result {
635        for item in arch_items {
636            if !initial_ids.contains(&item.id) {
637                items.push(item);
638            }
639        }
640    }
641
642    // Add advisories (filter out already-sent items)
643    if let Ok(adv_items) = advisories_result {
644        for item in adv_items {
645            if !initial_ids.contains(&item.id) {
646                items.push(item);
647            }
648        }
649    }
650
651    // Add package updates (filter out already-sent items)
652    if let Ok(upd_items) = updates_result {
653        for item in upd_items {
654            if !initial_ids.contains(&item.id) {
655                items.push(item);
656            }
657        }
658    }
659
660    // Add AUR comments (filter out already-sent items)
661    if let Ok(comment_items) = comments_result {
662        for item in comment_items {
663            if !initial_ids.contains(&item.id) {
664                items.push(item);
665            }
666        }
667    }
668
669    // Sort by date descending
670    sort_news_items(&mut items, NewsSortMode::DateDesc);
671
672    info!(
673        total = items.len(),
674        arch = items
675            .iter()
676            .filter(|i| matches!(i.source, NewsFeedSource::ArchNews))
677            .count(),
678        advisories = items
679            .iter()
680            .filter(|i| matches!(i.source, NewsFeedSource::SecurityAdvisory))
681            .count(),
682        updates = items
683            .iter()
684            .filter(|i| matches!(
685                i.source,
686                NewsFeedSource::InstalledPackageUpdate | NewsFeedSource::AurPackageUpdate
687            ))
688            .count(),
689        "continuation fetch complete"
690    );
691
692    Ok(items)
693}
694
695// Re-export public functions from submodules
696pub use rate_limit::{
697    check_circuit_breaker, extract_endpoint_pattern, extract_retry_after_from_error,
698    increase_archlinux_backoff, rate_limit_archlinux, record_circuit_breaker_outcome,
699    reset_archlinux_backoff, take_network_error,
700};
701pub use updates::{
702    get_aur_json_changes, get_official_json_changes, load_official_json_cache,
703    official_json_cache_path,
704};
705
706#[cfg(test)]
707mod tests;