pacsea/logic/files/
pkgbuild_cache.rs

1//! Disk-persisted LRU cache for parsed PKGBUILD data.
2
3use crate::logic::files::pkgbuild_parse::{
4    parse_backup_from_pkgbuild, parse_install_paths_from_pkgbuild,
5};
6use crate::state::Source;
7use lru::LruCache;
8use serde::{Deserialize, Serialize};
9use std::collections::hash_map::DefaultHasher;
10use std::fs;
11use std::hash::{Hash, Hasher};
12use std::num::NonZeroUsize;
13use std::path::PathBuf;
14#[cfg(test)]
15use std::sync::Arc;
16use std::sync::{Mutex, OnceLock};
17#[cfg(test)]
18use std::thread::ThreadId;
19
20/// Maximum number of PKGBUILD entries to cache.
21const CACHE_CAPACITY: usize = 200;
22/// Environment variable name for custom PKGBUILD cache path.
23const CACHE_PATH_ENV: &str = "PACSEA_PKGBUILD_CACHE_PATH";
24
25/// What: Source kind for PKGBUILD files.
26///
27/// Inputs: Determined from package source.
28///
29/// Output: Enum indicating where the PKGBUILD came from.
30///
31/// Details: Used to categorize PKGBUILD files by their origin (AUR, Official, or Unknown).
32#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
33pub enum PkgbuildSourceKind {
34    /// PKGBUILD came from AUR.
35    Aur,
36    /// PKGBUILD came from official repositories.
37    Official,
38    /// Source could not be determined.
39    Unknown,
40}
41
42impl From<&Source> for PkgbuildSourceKind {
43    fn from(src: &Source) -> Self {
44        match src {
45            Source::Aur => Self::Aur,
46            Source::Official { .. } => Self::Official,
47        }
48    }
49}
50
51/// What: Cached PKGBUILD parse entry.
52///
53/// Inputs: Parsed from PKGBUILD file.
54///
55/// Output: Structured PKGBUILD metadata.
56///
57/// Details: Stores parsed PKGBUILD information for caching purposes.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct PkgbuildParseEntry {
60    /// Package name.
61    pub name: String,
62    /// Package version.
63    pub version: String,
64    /// Source kind (AUR, Official, or Unknown).
65    pub source: PkgbuildSourceKind,
66    /// PKGBUILD file signature hash.
67    pub pkgbuild_signature: u64,
68    /// List of backup files specified in the PKGBUILD.
69    pub backup_files: Vec<String>,
70    /// List of install paths specified in the PKGBUILD.
71    pub install_paths: Vec<String>,
72}
73
74/// What: On-disk cache structure for PKGBUILD entries.
75///
76/// Inputs: Loaded from disk cache file.
77///
78/// Output: Serialized cache data.
79///
80/// Details: Used for persisting PKGBUILD cache to disk.
81#[derive(Debug, Serialize, Deserialize)]
82struct PkgbuildCacheDisk {
83    /// Cached PKGBUILD parse entries.
84    entries: Vec<PkgbuildParseEntry>,
85}
86
87/// What: In-memory cache state for PKGBUILD entries.
88///
89/// Inputs: Initialized with cache path.
90///
91/// Output: Manages LRU cache and dirty flag.
92///
93/// Details: Tracks cache state including LRU cache, file path, and whether changes need to be persisted.
94#[derive(Debug)]
95struct PkgbuildCacheState {
96    /// LRU cache of PKGBUILD entries.
97    lru: LruCache<String, PkgbuildParseEntry>,
98    /// Path to the cache file on disk.
99    path: PathBuf,
100    /// Whether the cache has been modified and needs to be saved.
101    dirty: bool,
102}
103
104impl PkgbuildCacheState {
105    /// What: Create a new PKGBUILD cache state.
106    ///
107    /// Inputs:
108    /// - `path`: Path to the cache file on disk.
109    ///
110    /// Output: New cache state with empty LRU cache.
111    ///
112    /// Details: Initializes a new cache state with the specified path and an empty LRU cache.
113    fn new(path: PathBuf) -> Self {
114        Self {
115            lru: LruCache::new(
116                NonZeroUsize::new(CACHE_CAPACITY)
117                    .unwrap_or_else(|| NonZeroUsize::new(1).expect("non-zero capacity")),
118            ),
119            path,
120            dirty: false,
121        }
122    }
123
124    /// What: Load cache entries from disk.
125    ///
126    /// Inputs: None (uses self.path).
127    ///
128    /// Output: Populates the LRU cache with entries from disk.
129    ///
130    /// Details: Reads the cache file from disk and populates the in-memory cache. Silently handles missing files.
131    fn load_from_disk(&mut self) {
132        let raw = match fs::read_to_string(&self.path) {
133            Ok(raw) => raw,
134            Err(e) => {
135                if e.kind() != std::io::ErrorKind::NotFound {
136                    tracing::warn!(
137                        path = %self.path.display(),
138                        error = %e,
139                        "[PKGBUILD cache] Failed to read cache file"
140                    );
141                }
142                return;
143            }
144        };
145
146        let parsed: PkgbuildCacheDisk = match serde_json::from_str(&raw) {
147            Ok(cache) => cache,
148            Err(e) => {
149                tracing::warn!(
150                    path = %self.path.display(),
151                    error = %e,
152                    "[PKGBUILD cache] Failed to parse cache file"
153                );
154                return;
155            }
156        };
157
158        // Insert from least-recent to most-recent to preserve order when iterating.
159        for entry in parsed.entries.into_iter().rev() {
160            let key = cache_key(&entry.name, &entry.version, entry.source);
161            let _ = self.lru.put(key, entry);
162        }
163        tracing::info!(
164            path = %self.path.display(),
165            count = self.lru.len(),
166            "[PKGBUILD cache] Loaded cache entries"
167        );
168    }
169
170    /// What: Write cache to disk if it has been modified.
171    ///
172    /// Inputs: None (uses self state).
173    ///
174    /// Output: Writes cache to disk if dirty flag is set.
175    ///
176    /// Details: Serializes the cache entries and writes them to disk, then clears the dirty flag.
177    fn flush_if_dirty(&mut self) {
178        if !self.dirty {
179            return;
180        }
181
182        let payload = PkgbuildCacheDisk {
183            entries: self.lru.iter().map(|(_, v)| v.clone()).collect(),
184        };
185
186        let Ok(serialized) = serde_json::to_string(&payload) else {
187            tracing::warn!("[PKGBUILD cache] Failed to serialize cache payload");
188            return;
189        };
190
191        if let Some(parent) = self.path.parent()
192            && let Err(e) = fs::create_dir_all(parent)
193        {
194            tracing::warn!(
195                path = %self.path.display(),
196                error = %e,
197                "[PKGBUILD cache] Failed to create parent directory"
198            );
199            return;
200        }
201
202        match fs::write(&self.path, serialized) {
203            Ok(()) => {
204                tracing::debug!(
205                    path = %self.path.display(),
206                    entries = self.lru.len(),
207                    "[PKGBUILD cache] Persisted cache to disk"
208                );
209                self.dirty = false;
210            }
211            Err(e) => {
212                tracing::warn!(
213                    path = %self.path.display(),
214                    error = %e,
215                    "[PKGBUILD cache] Failed to write cache to disk"
216                );
217            }
218        }
219    }
220}
221
222/// What: Get the path to the PKGBUILD cache file.
223///
224/// Inputs: None.
225///
226/// Output: Path to the cache file.
227///
228/// Details: Checks environment variable first, otherwise uses default path in lists directory.
229fn cache_path() -> PathBuf {
230    if let Ok(path) = std::env::var(CACHE_PATH_ENV) {
231        return PathBuf::from(path);
232    }
233    crate::theme::lists_dir().join("pkgbuild_parse_cache.json")
234}
235
236/// What: Get the global cache state singleton.
237///
238/// Inputs: None.
239///
240/// Output: Reference to the global cache state mutex.
241///
242/// Details: Initializes the cache state on first access, loading from disk if available.
243fn cache_state() -> &'static Mutex<PkgbuildCacheState> {
244    static STATE: OnceLock<Mutex<PkgbuildCacheState>> = OnceLock::new();
245    STATE.get_or_init(|| {
246        let path = cache_path();
247        let mut state = PkgbuildCacheState::new(path);
248        state.load_from_disk();
249        Mutex::new(state)
250    })
251}
252
253/// What: Compute a hash signature for PKGBUILD contents.
254///
255/// Inputs:
256/// - `contents`: PKGBUILD file contents.
257///
258/// Output: 64-bit hash signature.
259///
260/// Details: Uses `DefaultHasher` to compute a hash of the PKGBUILD contents for cache invalidation.
261fn compute_signature(contents: &str) -> u64 {
262    let mut hasher = DefaultHasher::new();
263    contents.hash(&mut hasher);
264    hasher.finish()
265}
266
267/// What: Generate a cache key for a PKGBUILD entry.
268///
269/// Inputs:
270/// - `name`: Package name.
271/// - `version`: Package version.
272/// - `source`: Source kind.
273///
274/// Output: Cache key string.
275///
276/// Details: Creates a unique cache key by combining package name, version, and source kind.
277fn cache_key(name: &str, version: &str, source: PkgbuildSourceKind) -> String {
278    format!("{name}::{version}::{source:?}")
279}
280
281#[cfg(test)]
282#[derive(Clone, Copy, Debug, PartialEq, Eq)]
283pub(in crate::logic::files) enum CacheTestHookPoint {
284    AfterLookup,
285}
286
287#[cfg(test)]
288pub(in crate::logic::files) type CacheTestHook = dyn Fn(CacheTestHookPoint) + Send + Sync + 'static;
289
290#[cfg(test)]
291#[derive(Clone)]
292struct CacheTestHookEntry {
293    hook: Arc<CacheTestHook>,
294    thread_id: ThreadId,
295}
296
297#[cfg(test)]
298fn cache_test_hook_slot() -> &'static Mutex<Option<CacheTestHookEntry>> {
299    static HOOK: OnceLock<Mutex<Option<CacheTestHookEntry>>> = OnceLock::new();
300    HOOK.get_or_init(|| Mutex::new(None))
301}
302
303#[cfg(test)]
304/// What: Temporarily register a cache test hook for synchronization.
305///
306/// Inputs:
307/// - `hook`: Callback executed when a cache hook point is reached.
308/// - `thread_id`: Thread id to match before invoking the hook.
309///
310/// Output:
311/// - Guard that clears the hook on drop to restore default behavior.
312///
313/// Details:
314/// - Only compiled in tests; the hook is global and not re-entrant.
315pub fn set_cache_test_hook(hook: Arc<CacheTestHook>, thread_id: ThreadId) -> CacheTestHookGuard {
316    if let Ok(mut slot) = cache_test_hook_slot().lock() {
317        *slot = Some(CacheTestHookEntry { hook, thread_id });
318    }
319    CacheTestHookGuard
320}
321
322#[cfg(test)]
323/// What: RAII guard that removes the active cache test hook on drop.
324///
325/// Inputs: None.
326///
327/// Output:
328/// - Clears any registered test hook when dropped.
329///
330/// Details:
331/// - Scope the guard to the duration the hook should stay active.
332pub struct CacheTestHookGuard;
333
334#[cfg(test)]
335impl Drop for CacheTestHookGuard {
336    fn drop(&mut self) {
337        if let Ok(mut slot) = cache_test_hook_slot().lock() {
338            slot.take();
339        }
340    }
341}
342
343#[cfg(test)]
344fn invoke_cache_test_hook(point: CacheTestHookPoint) {
345    // Clone hook entry and release slot mutex before invoking so that other threads
346    // can still check the hook slot while this thread is blocked inside the callback.
347    let entry = cache_test_hook_slot()
348        .lock()
349        .ok()
350        .and_then(|slot| slot.clone());
351    if let Some(hook) = entry
352        && std::thread::current().id() == hook.thread_id
353    {
354        (hook.hook)(point);
355    }
356}
357
358/// What: Parse PKGBUILD data while leveraging a disk-backed LRU cache.
359///
360/// Inputs:
361/// - `name`: Package name used for keying and install path inference.
362/// - `version`: Package version (fall back to `"unknown"` if empty).
363/// - `source`: Source kind for keying (Aur/Official/Unknown).
364/// - `pkgbuild`: Raw PKGBUILD text to parse.
365///
366/// Output:
367/// - Parsed entry containing backup files and install paths. On cache hit with matching
368///   signature, returns the cached entry. On cache miss or signature mismatch, parses
369///   fresh data, updates the cache, and returns the new entry.
370///
371/// Details:
372/// - Uses a signature of the PKGBUILD text to detect staleness even when version is unchanged.
373/// - Cache is bounded to 200 entries and persists to disk via `flush_pkgbuild_cache()`.
374pub fn parse_pkgbuild_cached(
375    name: &str,
376    version: Option<&str>,
377    source: PkgbuildSourceKind,
378    pkgbuild: &str,
379) -> PkgbuildParseEntry {
380    let normalized_version = version
381        .filter(|v| !v.is_empty())
382        .map_or_else(|| "unknown".to_string(), ToString::to_string);
383    let signature = compute_signature(pkgbuild);
384    let key = cache_key(name, &normalized_version, source);
385    let prior_signature = if let Ok(mut guard) = cache_state().lock()
386        && let Some(entry) = guard.lru.get(&key)
387    {
388        if entry.pkgbuild_signature == signature {
389            return entry.clone();
390        }
391        Some(entry.pkgbuild_signature)
392    } else {
393        None
394    };
395
396    #[cfg(test)]
397    invoke_cache_test_hook(CacheTestHookPoint::AfterLookup);
398
399    let parsed = PkgbuildParseEntry {
400        name: name.to_string(),
401        version: normalized_version,
402        source,
403        pkgbuild_signature: signature,
404        backup_files: parse_backup_from_pkgbuild(pkgbuild),
405        install_paths: parse_install_paths_from_pkgbuild(pkgbuild, name),
406    };
407
408    let mut guard = match cache_state().lock() {
409        Ok(guard) => guard,
410        Err(poisoned) => {
411            tracing::warn!(
412                "[PKGBUILD cache] Cache mutex poisoned; continuing with recovered state"
413            );
414            poisoned.into_inner()
415        }
416    };
417
418    if let Some(entry) = guard.lru.get(&key) {
419        if entry.pkgbuild_signature == signature {
420            return entry.clone();
421        }
422
423        if prior_signature.is_some() && prior_signature == Some(entry.pkgbuild_signature) {
424            let _ = guard.lru.put(key, parsed.clone());
425            guard.dirty = true;
426            return parsed;
427        }
428
429        return entry.clone();
430    }
431
432    let _ = guard.lru.put(key, parsed.clone());
433    guard.dirty = true;
434
435    parsed
436}
437
438/// What: Persist the PKGBUILD parse cache to disk when dirty.
439///
440/// Inputs: None.
441///
442/// Output:
443/// - Best-effort disk write of the cache file; clears the dirty flag on success.
444///
445/// Details:
446/// - Safe to call frequently; returns immediately when nothing has changed.
447pub fn flush_pkgbuild_cache() {
448    if let Ok(mut guard) = cache_state().lock() {
449        guard.flush_if_dirty();
450    }
451}
452
453#[cfg(test)]
454pub fn reset_cache_for_tests(path: PathBuf) {
455    if let Ok(mut guard) = cache_state().lock() {
456        let mut state = PkgbuildCacheState::new(path);
457        state.load_from_disk();
458        *guard = state;
459    }
460}
461
462#[cfg(test)]
463pub fn peek_cache_entry_for_tests(
464    name: &str,
465    version: &str,
466    source: PkgbuildSourceKind,
467) -> Option<PkgbuildParseEntry> {
468    let key = cache_key(name, version, source);
469    cache_state()
470        .lock()
471        .ok()
472        .and_then(|mut guard| guard.lru.get(&key).cloned())
473}
474
475#[cfg(test)]
476mod tests {
477    use super::*;
478    use std::sync::atomic::{AtomicBool, Ordering};
479    use std::sync::{Arc, Barrier, mpsc};
480    use std::time::Duration;
481
482    fn sample_pkgbuild() -> String {
483        r#"
484pkgname=sample
485pkgver=1.2.3
486pkgrel=1
487backup=('etc/sample.conf' '/etc/sample.d/more.conf')
488package() {
489  install -Dm755 "$srcdir/sample" "$pkgdir/usr/bin/sample"
490  install -Dm644 "$srcdir/sample.conf" "$pkgdir/etc/sample.conf"
491}
492"#
493        .to_string()
494    }
495
496    fn temp_cache_path(label: &str) -> PathBuf {
497        let mut path = std::env::temp_dir();
498        path.push(format!(
499            "pacsea_pkgb_cache_{label}_{}_{}.json",
500            std::process::id(),
501            std::time::SystemTime::now()
502                .duration_since(std::time::UNIX_EPOCH)
503                .expect("system time ok")
504                .as_nanos()
505        ));
506        path
507    }
508
509    #[test]
510    fn cache_hit_returns_same_signature_entry() {
511        let path = temp_cache_path("hit");
512        reset_cache_for_tests(path);
513        let text = sample_pkgbuild();
514        let entry = parse_pkgbuild_cached("sample", Some("1.2.3"), PkgbuildSourceKind::Aur, &text);
515        assert!(entry.backup_files.contains(&"etc/sample.conf".to_string()));
516        assert!(entry.install_paths.contains(&"/usr/bin/sample".to_string()));
517        let hit = parse_pkgbuild_cached("sample", Some("1.2.3"), PkgbuildSourceKind::Aur, &text);
518        assert_eq!(hit.pkgbuild_signature, entry.pkgbuild_signature);
519        assert_eq!(hit.install_paths, entry.install_paths);
520    }
521
522    #[test]
523    fn cache_miss_on_signature_change_reparses() {
524        let path = temp_cache_path("miss");
525        reset_cache_for_tests(path);
526        let text = sample_pkgbuild();
527        let _ = parse_pkgbuild_cached("sample", Some("1.2.3"), PkgbuildSourceKind::Official, &text);
528        let modified = format!("{text}\n# change");
529        let updated = parse_pkgbuild_cached(
530            "sample",
531            Some("1.2.3"),
532            PkgbuildSourceKind::Official,
533            &modified,
534        );
535        assert!(updated.pkgbuild_signature != compute_signature(&text));
536    }
537
538    #[test]
539    fn flush_and_reload_persists_entries() {
540        let path = temp_cache_path("persist");
541        reset_cache_for_tests(path.clone());
542        let text = sample_pkgbuild();
543        let entry = parse_pkgbuild_cached("sample", Some("1.2.3"), PkgbuildSourceKind::Aur, &text);
544        flush_pkgbuild_cache();
545        reset_cache_for_tests(path);
546        let cached = peek_cache_entry_for_tests("sample", "1.2.3", PkgbuildSourceKind::Aur)
547            .expect("entry should reload");
548        assert_eq!(cached.pkgbuild_signature, entry.pkgbuild_signature);
549        assert_eq!(cached.backup_files, entry.backup_files);
550    }
551
552    #[test]
553    fn cache_evicts_oldest_when_capacity_exceeded() {
554        let path = temp_cache_path("evict");
555        reset_cache_for_tests(path);
556        let text = sample_pkgbuild();
557        for i in 0..(CACHE_CAPACITY + 5) {
558            let name = format!("pkg{i}");
559            parse_pkgbuild_cached(&name, Some("1"), PkgbuildSourceKind::Unknown, &text);
560        }
561        assert!(
562            peek_cache_entry_for_tests("pkg0", "1", PkgbuildSourceKind::Unknown).is_none(),
563            "oldest entry should be evicted past capacity"
564        );
565    }
566
567    #[test]
568    fn concurrent_parse_does_not_overwrite_newer_entry() {
569        let path = temp_cache_path("concurrent");
570        reset_cache_for_tests(path);
571        let name = "racepkg";
572        let stale_pkgbuild = sample_pkgbuild();
573        let newer_pkgbuild = r#"
574pkgname=sample
575pkgver=9.9.9
576pkgrel=1
577backup=('etc/sample.conf')
578package() {
579  install -Dm755 "$srcdir/sample" "$pkgdir/usr/bin/sample"
580  install -Dm644 "$srcdir/sample.conf" "$pkgdir/etc/sample.conf"
581}
582"#
583        .to_string();
584
585        let (reached_tx, reached_rx) = mpsc::channel();
586        let (resume_tx, resume_rx) = mpsc::channel();
587        let resume_rx = Arc::new(Mutex::new(resume_rx));
588        let hook_consumed = Arc::new(AtomicBool::new(false));
589        let hook_flag = Arc::clone(&hook_consumed);
590        let hook_resume = Arc::clone(&resume_rx);
591        let hook = Arc::new(move |point: CacheTestHookPoint| {
592            if point == CacheTestHookPoint::AfterLookup
593                && hook_flag
594                    .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
595                    .is_ok()
596            {
597                let _ = reached_tx.send(());
598                hook_resume
599                    .lock()
600                    .expect("resume_rx lock poisoned")
601                    .recv()
602                    .expect("resume signal should arrive");
603            }
604        });
605        let start_barrier = Arc::new(Barrier::new(2));
606
607        let stale_pkgbuild_for_thread = stale_pkgbuild.clone();
608        let stale_start = Arc::clone(&start_barrier);
609        let stale_handle = std::thread::spawn(move || {
610            stale_start.wait();
611            parse_pkgbuild_cached(
612                name,
613                Some("1.2.3"),
614                PkgbuildSourceKind::Aur,
615                &stale_pkgbuild_for_thread,
616            )
617        });
618
619        let stale_thread_id = stale_handle.thread().id();
620        let _guard = set_cache_test_hook(hook, stale_thread_id);
621        start_barrier.wait();
622
623        reached_rx
624            .recv_timeout(Duration::from_secs(2))
625            .expect("stale thread should reach hook before proceeding");
626
627        let newer_pkgbuild_for_thread = newer_pkgbuild.clone();
628        let new_handle = std::thread::spawn(move || {
629            parse_pkgbuild_cached(
630                name,
631                Some("1.2.3"),
632                PkgbuildSourceKind::Aur,
633                &newer_pkgbuild_for_thread,
634            )
635        });
636
637        let new_entry = new_handle
638            .join()
639            .expect("new parsing thread should finish without panic");
640        resume_tx
641            .send(())
642            .expect("should release stale thread after new parse completes");
643        let stale_entry = stale_handle
644            .join()
645            .expect("stale parsing thread should finish without panic");
646
647        let cached = peek_cache_entry_for_tests(name, "1.2.3", PkgbuildSourceKind::Aur)
648            .expect("cache entry should exist after concurrent parses");
649        let stale_signature = compute_signature(&stale_pkgbuild);
650        let new_signature = compute_signature(&newer_pkgbuild);
651
652        assert_eq!(
653            cached.pkgbuild_signature, new_signature,
654            "newer entry must remain in cache"
655        );
656        assert_eq!(
657            cached.pkgbuild_signature, new_entry.pkgbuild_signature,
658            "cache entry should match result of newer parse"
659        );
660        assert_ne!(
661            cached.pkgbuild_signature, stale_signature,
662            "stale parse must not overwrite newer cache entry"
663        );
664        // When the stale thread loses the race, it should return the cached (newer)
665        // entry rather than its own stale parse result.
666        assert_eq!(
667            stale_entry.pkgbuild_signature, new_entry.pkgbuild_signature,
668            "stale thread should return cached newer entry after losing race"
669        );
670        assert_ne!(
671            stale_signature, new_signature,
672            "test setup should use distinct PKGBUILD contents"
673        );
674    }
675}