pacsea/sources/
advisories.rs

1//! security.archlinux.org advisory fetcher.
2use crate::state::types::{AdvisorySeverity, NewsFeedItem, NewsFeedSource};
3use tracing::{info, warn};
4
5/// Result type alias for advisory fetching operations.
6type Result<T> = super::Result<T>;
7
8/// What: Fetch security advisories from security.archlinux.org and convert to feed items.
9///
10/// Inputs:
11/// - `limit`: Maximum number of advisories to return (best-effort).
12/// - `cutoff_date`: Optional date string (YYYY-MM-DD) for early filtering.
13///
14/// Output:
15/// - `Ok(Vec<NewsFeedItem>)` on success; `Err` on network/parse failure.
16///
17/// Details:
18/// - Uses the public JSON advisory feed.
19/// - Normalizes severity strings and packages; skips entries without an ID.
20/// - If `cutoff_date` is provided, stops fetching when items exceed the date limit.
21///
22/// # Errors
23/// - Network fetch failures
24/// - JSON parsing failures
25pub async fn fetch_security_advisories(
26    limit: usize,
27    cutoff_date: Option<&str>,
28) -> Result<Vec<NewsFeedItem>> {
29    // Official advisory Atom feed
30    let url = "https://security.archlinux.org/advisory/feed.atom";
31    let resp = reqwest::get(url).await?;
32    let status = resp.status();
33    let body = resp.text().await?;
34    info!(
35        status = status.as_u16(),
36        bytes = body.len(),
37        "fetched advisories feed"
38    );
39    if !status.is_success() {
40        let preview: String = body.chars().take(300).collect();
41        warn!(
42            status = status.as_u16(),
43            preview = preview,
44            "advisory feed returned non-success status"
45        );
46        return Err(format!("advisory feed status {status}").into());
47    }
48
49    let mut items = Vec::new();
50    let mut pos = 0;
51    while items.len() < limit {
52        let Some(start) = body[pos..].find("<entry>") else {
53            break;
54        };
55        let s = pos + start;
56        let end = body[s..].find("</entry>").map_or(body.len(), |e| s + e + 8);
57        let chunk = &body[s..end];
58
59        let title = extract_between(chunk, "<title>", "</title>").unwrap_or_default();
60        let link = extract_link_href(chunk).unwrap_or_default();
61        let raw_date = extract_between(chunk, "<updated>", "</updated>")
62            .or_else(|| extract_between(chunk, "<published>", "</published>"))
63            .unwrap_or_default();
64        let date = strip_time(&raw_date);
65        // Early date filtering: stop if item is older than cutoff_date
66        if let Some(cutoff) = cutoff_date
67            && date.as_str() < cutoff
68        {
69            break;
70        }
71        let summary = extract_between(chunk, "<summary>", "</summary>");
72        let id = if !link.is_empty() {
73            link.clone()
74        } else if !title.is_empty() {
75            title.clone()
76        } else {
77            raw_date.clone()
78        };
79
80        items.push(NewsFeedItem {
81            id,
82            date,
83            title: if title.is_empty() {
84                "Advisory".to_string()
85            } else {
86                title
87            },
88            summary,
89            url: if link.is_empty() { None } else { Some(link) },
90            source: NewsFeedSource::SecurityAdvisory,
91            severity: Some(AdvisorySeverity::Unknown),
92            packages: Vec::new(),
93        });
94        pos = end;
95    }
96    info!(count = items.len(), "parsed advisories feed");
97    Ok(items)
98}
99
100/// What: Normalize severity string into `AdvisorySeverity`.
101///
102/// Inputs:
103/// - `severity`: Optional severity string from feed.
104///
105/// Output:
106/// - Matching `AdvisorySeverity` variant (default Unknown).
107fn extract_between(s: &str, start: &str, end: &str) -> Option<String> {
108    let i = s.find(start)? + start.len();
109    let j = s[i..].find(end)? + i;
110    Some(s[i..j].to_string())
111}
112
113/// What: Extract href attribute value from a link tag in HTML string.
114///
115/// Inputs:
116/// - `s`: HTML string containing a link tag.
117///
118/// Output: Option containing the href value if found.
119///
120/// Details: Parses HTML to find link tag and extract href attribute value.
121fn extract_link_href(s: &str) -> Option<String> {
122    // Look for link tag with href attribute
123    let link_pos = s.find("<link")?;
124    let rest = &s[link_pos..];
125    let href_pos = rest.find("href=\"")?;
126    let after = &rest[href_pos + 6..];
127    let end = after.find('"')?;
128    Some(after[..end].to_string())
129}
130
131/// What: Strip time component from ISO 8601 datetime string.
132///
133/// Inputs:
134/// - `s`: ISO 8601 datetime string (e.g., "2024-01-01T12:00:00").
135///
136/// Output: Date string without time component.
137///
138/// Details: Splits on 'T' and returns only the date part.
139fn strip_time(s: &str) -> String {
140    s.split('T').next().unwrap_or(s).trim().to_string()
141}
142
143#[cfg(test)]
144mod tests {
145    use super::{extract_between, strip_time};
146
147    #[test]
148    fn extract_and_strip_helpers() {
149        assert_eq!(
150            extract_between("<a>hi</a>", "<a>", "</a>").as_deref(),
151            Some("hi")
152        );
153        assert_eq!(extract_between("nope", "<a>", "</a>"), None);
154        assert_eq!(strip_time("2025-12-07T14:00:00Z"), "2025-12-07");
155        assert_eq!(strip_time("2025-12-07"), "2025-12-07");
156    }
157}