pacsea/logic/sandbox/
parse.rs

1//! Parsing functions for .SRCINFO and PKGBUILD dependency extraction.
2
3/// What: Parse dependencies from .SRCINFO content.
4///
5/// Inputs:
6/// - `srcinfo`: Raw .SRCINFO file content.
7///
8/// Output:
9/// - Returns a tuple of (depends, makedepends, checkdepends, optdepends) vectors.
10#[allow(clippy::case_sensitive_file_extension_comparisons)]
11pub(super) fn parse_srcinfo_deps(
12    srcinfo: &str,
13) -> (Vec<String>, Vec<String>, Vec<String>, Vec<String>) {
14    let mut depends = Vec::new();
15    let mut makedepends = Vec::new();
16    let mut checkdepends = Vec::new();
17    let mut optdepends = Vec::new();
18
19    for line in srcinfo.lines() {
20        let line = line.trim();
21        if line.is_empty() || line.starts_with('#') {
22            continue;
23        }
24
25        // .SRCINFO format: key = value
26        if let Some((key, value)) = line.split_once('=') {
27            let key = key.trim();
28            let value = value.trim();
29
30            // Filter out virtual packages (.so files)
31            let value_lower = value.to_lowercase();
32            if value_lower.ends_with(".so")
33                || value_lower.contains(".so.")
34                || value_lower.contains(".so=")
35            {
36                continue;
37            }
38
39            match key {
40                "depends" => depends.push(value.to_string()),
41                "makedepends" => makedepends.push(value.to_string()),
42                "checkdepends" => checkdepends.push(value.to_string()),
43                "optdepends" => optdepends.push(value.to_string()),
44                _ => {}
45            }
46        }
47    }
48
49    (depends, makedepends, checkdepends, optdepends)
50}
51
52/// What: Parse dependencies from PKGBUILD content.
53///
54/// Inputs:
55/// - `pkgbuild`: Raw PKGBUILD file content.
56///
57/// Output:
58/// - Returns a tuple of (depends, makedepends, checkdepends, optdepends) vectors.
59///
60/// Details:
61/// - Parses bash array syntax: `depends=('foo' 'bar>=1.2')` (single-line)
62/// - Also handles `depends+=` patterns used in functions like `package()`
63/// - Handles both quoted and unquoted dependencies
64/// - Also handles multi-line arrays:
65///   ```text
66///   depends=(
67///       'foo'
68///       'bar>=1.2'
69///   )
70///   ```
71/// - Filters out .so files (virtual packages) and invalid package names
72/// - Only parses specific dependency fields (depends, makedepends, checkdepends, optdepends)
73pub fn parse_pkgbuild_deps(pkgbuild: &str) -> (Vec<String>, Vec<String>, Vec<String>, Vec<String>) {
74    tracing::debug!(
75        "parse_pkgbuild_deps: Starting parse, PKGBUILD length={}, first 500 chars: {:?}",
76        pkgbuild.len(),
77        pkgbuild.chars().take(500).collect::<String>()
78    );
79    let mut depends = Vec::new();
80    let mut makedepends = Vec::new();
81    let mut checkdepends = Vec::new();
82    let mut optdepends = Vec::new();
83
84    let lines: Vec<&str> = pkgbuild.lines().collect();
85    tracing::debug!(
86        "parse_pkgbuild_deps: Total lines in PKGBUILD: {}",
87        lines.len()
88    );
89    let mut i = 0;
90
91    while i < lines.len() {
92        let line = lines[i].trim();
93        i += 1;
94
95        if line.is_empty() || line.starts_with('#') {
96            continue;
97        }
98
99        // Parse array declarations: depends=('foo' 'bar') or depends=( or depends+=('foo' 'bar')
100        if let Some((key, value)) = line.split_once('=') {
101            let key = key.trim();
102            let value = value.trim();
103
104            // Handle both depends= and depends+= patterns
105            let base_key = key.strip_suffix('+').map_or(key, |stripped| stripped);
106
107            // Only parse specific dependency fields, ignore other PKGBUILD fields
108            if !matches!(
109                base_key,
110                "depends" | "makedepends" | "checkdepends" | "optdepends"
111            ) {
112                continue;
113            }
114
115            tracing::debug!(
116                "parse_pkgbuild_deps: Found key-value pair: key='{}', base_key='{}', value='{}'",
117                key,
118                base_key,
119                value.chars().take(100).collect::<String>()
120            );
121
122            // Check if this is an array declaration
123            if value.starts_with('(') {
124                tracing::debug!(
125                    "parse_pkgbuild_deps: Detected array declaration for key='{}'",
126                    key
127                );
128                let deps = find_matching_closing_paren(value).map_or_else(
129                    || {
130                        // Multi-line array: depends=(
131                        //     'foo'
132                        //     'bar'
133                        // )
134                        tracing::debug!("Parsing multi-line {} array", key);
135                        let mut array_lines = Vec::new();
136                        // Collect lines until we find the closing parenthesis
137                        while i < lines.len() {
138                            let next_line = lines[i].trim();
139                            i += 1;
140
141                            // Skip empty lines and comments
142                            if next_line.is_empty() || next_line.starts_with('#') {
143                                continue;
144                            }
145
146                            // Check if this line closes the array
147                            if next_line == ")" {
148                                break;
149                            }
150
151                            // Check if this line contains a closing parenthesis (may be on same line as content)
152                            if let Some(paren_pos) = next_line.find(')') {
153                                // Extract content before the closing paren
154                                let content_before_paren = &next_line[..paren_pos].trim();
155                                if !content_before_paren.is_empty() {
156                                    array_lines.push((*content_before_paren).to_string());
157                                }
158                                break;
159                            }
160
161                            // Add this line to the array content
162                            array_lines.push(next_line.to_string());
163                        }
164
165                        // Parse all collected lines as array content
166                        // Ensure proper spacing between items (each line should be a separate item)
167                        let array_content = array_lines
168                            .iter()
169                            .map(|s| s.trim())
170                            .filter(|s| !s.is_empty())
171                            .collect::<Vec<_>>()
172                            .join(" ");
173                        tracing::debug!(
174                            "Collected {} lines for multi-line {} array: {}",
175                            array_lines.len(),
176                            key,
177                            array_content
178                        );
179                        let parsed = parse_array_content(&array_content);
180                        tracing::debug!("Parsed array content: {:?}", parsed);
181                        parsed
182                    },
183                    |closing_paren_pos| {
184                        // Single-line array (may have content after closing paren): depends=('foo' 'bar') or depends+=('foo' 'bar') other_code
185                        let array_content = &value[1..closing_paren_pos];
186                        tracing::debug!("Parsing single-line {} array: {}", key, array_content);
187                        let parsed = parse_array_content(array_content);
188                        tracing::debug!("Parsed array content: {:?}", parsed);
189                        parsed
190                    },
191                );
192
193                // Filter out invalid dependencies (.so files, invalid names, etc.)
194                let filtered_deps: Vec<String> = deps
195                    .into_iter()
196                    .filter_map(|dep| {
197                        let dep_trimmed = dep.trim();
198                        if dep_trimmed.is_empty() {
199                            return None;
200                        }
201
202                        // Filter out .so files (virtual packages)
203                        let dep_lower = dep_trimmed.to_lowercase();
204                        if std::path::Path::new(&dep_lower)
205                            .extension()
206                            .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
207                            || dep_lower.contains(".so.")
208                            || dep_lower.contains(".so=")
209                        {
210                            return None;
211                        }
212
213                        // Filter out names ending with ) - this is a parsing error
214                        // But first check if it's actually a valid name with version constraint ending in )
215                        // like "package>=1.0)" which would be a parsing error
216                        if dep_trimmed.ends_with(')') {
217                            // Check if it might be a valid version constraint that accidentally ends with )
218                            // If it contains version operators before the ), it's likely a parsing error
219                            if dep_trimmed.contains(">=")
220                                || dep_trimmed.contains("<=")
221                                || dep_trimmed.contains("==")
222                            {
223                                // This looks like "package>=1.0)" which is invalid
224                                return None;
225                            }
226                            // Otherwise, it might be "package)" which is also invalid
227                            return None;
228                        }
229
230                        // Filter out names that don't look like package names
231                        // Package names should start with alphanumeric or underscore
232                        let first_char = dep_trimmed.chars().next().unwrap_or(' ');
233                        if !first_char.is_alphanumeric() && first_char != '_' {
234                            return None;
235                        }
236
237                        // Filter out names that are too short
238                        if dep_trimmed.len() < 2 {
239                            return None;
240                        }
241
242                        // Filter out names containing invalid characters (but allow version operators)
243                        // Allow: alphanumeric, dash, underscore, and version operators (>=, <=, ==, >, <)
244                        let has_valid_chars = dep_trimmed
245                            .chars()
246                            .any(|c| c.is_alphanumeric() || c == '-' || c == '_');
247                        if !has_valid_chars {
248                            return None;
249                        }
250
251                        Some(dep_trimmed.to_string())
252                    })
253                    .collect();
254
255                // Add dependencies to the appropriate vector (using base_key to handle both = and +=)
256                match base_key {
257                    "depends" => depends.extend(filtered_deps),
258                    "makedepends" => makedepends.extend(filtered_deps),
259                    "checkdepends" => checkdepends.extend(filtered_deps),
260                    "optdepends" => optdepends.extend(filtered_deps),
261                    _ => {}
262                }
263            }
264        }
265    }
266
267    (depends, makedepends, checkdepends, optdepends)
268}
269
270/// What: Find the position of the matching closing parenthesis in a string.
271///
272/// Inputs:
273/// - `s`: String starting with an opening parenthesis.
274///
275/// Output:
276/// - `Some(position)` if a matching closing parenthesis is found, `None` otherwise.
277///
278/// Details:
279/// - Handles nested parentheses and quoted strings.
280fn find_matching_closing_paren(s: &str) -> Option<usize> {
281    let mut depth = 0;
282    let mut in_quotes = false;
283    let mut quote_char = '\0';
284
285    for (pos, ch) in s.char_indices() {
286        match ch {
287            '\'' | '"' => {
288                if !in_quotes {
289                    in_quotes = true;
290                    quote_char = ch;
291                } else if ch == quote_char {
292                    in_quotes = false;
293                    quote_char = '\0';
294                }
295            }
296            '(' if !in_quotes => {
297                depth += 1;
298            }
299            ')' if !in_quotes => {
300                depth -= 1;
301                if depth == 0 {
302                    return Some(pos);
303                }
304            }
305            _ => {}
306        }
307    }
308    None
309}
310
311/// What: Parse quoted and unquoted strings from bash array content.
312///
313/// Inputs:
314/// - `content`: Array content string (e.g., "'foo' 'bar>=1.2'" or "libcairo.so libdbus-1.so").
315///
316/// Output:
317/// - Vector of dependency strings.
318///
319/// Details:
320/// - Handles both quoted ('foo') and unquoted (foo) dependencies.
321/// - Splits on whitespace for unquoted values.
322fn parse_array_content(content: &str) -> Vec<String> {
323    let mut deps = Vec::new();
324    let mut in_quotes = false;
325    let mut quote_char = '\0';
326    let mut current = String::new();
327
328    for ch in content.chars() {
329        match ch {
330            '\'' | '"' => {
331                if !in_quotes {
332                    in_quotes = true;
333                    quote_char = ch;
334                } else if ch == quote_char {
335                    if !current.is_empty() {
336                        deps.push(current.clone());
337                        current.clear();
338                    }
339                    in_quotes = false;
340                    quote_char = '\0';
341                } else {
342                    current.push(ch);
343                }
344            }
345            _ if in_quotes => {
346                current.push(ch);
347            }
348            ch if ch.is_whitespace() => {
349                // Whitespace outside quotes - end current unquoted value
350                if !current.is_empty() {
351                    deps.push(current.clone());
352                    current.clear();
353                }
354            }
355            _ => {
356                // Non-whitespace character outside quotes - add to current value
357                current.push(ch);
358            }
359        }
360    }
361
362    // Handle unclosed quote or trailing unquoted value
363    if !current.is_empty() {
364        deps.push(current);
365    }
366
367    deps
368}
369
370/// What: Parse conflicts from PKGBUILD content.
371///
372/// Inputs:
373/// - `pkgbuild`: Raw PKGBUILD file content.
374///
375/// Output:
376/// - Returns a vector of conflicting package names.
377///
378/// Details:
379/// - Parses bash array syntax: `conflicts=('foo' 'bar')` (single-line)
380/// - Also handles `conflicts+=` patterns used in functions like `package()`
381/// - Handles both quoted and unquoted conflicts
382/// - Also handles multi-line arrays:
383///   ```text
384///   conflicts=(
385///       'foo'
386///       'bar'
387///   )
388///   ```
389/// - Filters out .so files (virtual packages) and invalid package names
390/// - Extracts package names from version constraints (e.g., "jujutsu-git>=1.0" -> "jujutsu-git")
391pub fn parse_pkgbuild_conflicts(pkgbuild: &str) -> Vec<String> {
392    tracing::debug!(
393        "parse_pkgbuild_conflicts: Starting parse, PKGBUILD length={}",
394        pkgbuild.len()
395    );
396    let mut conflicts = Vec::new();
397
398    let lines: Vec<&str> = pkgbuild.lines().collect();
399    let mut i = 0;
400
401    while i < lines.len() {
402        let line = lines[i].trim();
403        i += 1;
404
405        if line.is_empty() || line.starts_with('#') {
406            continue;
407        }
408
409        // Parse array declarations: conflicts=('foo' 'bar') or conflicts=( or conflicts+=('foo' 'bar')
410        if let Some((key, value)) = line.split_once('=') {
411            let key = key.trim();
412            let value = value.trim();
413
414            // Handle both conflicts= and conflicts+= patterns
415            let base_key = key.strip_suffix('+').map_or(key, |stripped| stripped);
416
417            // Only parse conflicts field
418            if base_key != "conflicts" {
419                continue;
420            }
421
422            tracing::debug!(
423                "parse_pkgbuild_conflicts: Found key-value pair: key='{}', base_key='{}', value='{}'",
424                key,
425                base_key,
426                value.chars().take(100).collect::<String>()
427            );
428
429            // Check if this is an array declaration
430            if value.starts_with('(') {
431                tracing::debug!(
432                    "parse_pkgbuild_conflicts: Detected array declaration for key='{}'",
433                    key
434                );
435                let conflict_deps = find_matching_closing_paren(value).map_or_else(
436                    || {
437                        // Multi-line array: conflicts=(
438                        //     'foo'
439                        //     'bar'
440                        // )
441                        tracing::debug!("Parsing multi-line {} array", key);
442                        let mut array_lines = Vec::new();
443                        // Collect lines until we find the closing parenthesis
444                        while i < lines.len() {
445                            let next_line = lines[i].trim();
446                            i += 1;
447
448                            // Skip empty lines and comments
449                            if next_line.is_empty() || next_line.starts_with('#') {
450                                continue;
451                            }
452
453                            // Check if this line closes the array
454                            if next_line == ")" {
455                                break;
456                            }
457
458                            // Check if this line contains a closing parenthesis (may be on same line as content)
459                            if let Some(paren_pos) = next_line.find(')') {
460                                // Extract content before the closing paren
461                                let content_before_paren = &next_line[..paren_pos].trim();
462                                if !content_before_paren.is_empty() {
463                                    array_lines.push((*content_before_paren).to_string());
464                                }
465                                break;
466                            }
467
468                            // Add this line to the array content
469                            array_lines.push(next_line.to_string());
470                        }
471
472                        // Parse all collected lines as array content
473                        let array_content = array_lines
474                            .iter()
475                            .map(|s| s.trim())
476                            .filter(|s| !s.is_empty())
477                            .collect::<Vec<_>>()
478                            .join(" ");
479                        tracing::debug!(
480                            "Collected {} lines for multi-line {} array: {}",
481                            array_lines.len(),
482                            key,
483                            array_content
484                        );
485                        let parsed = parse_array_content(&array_content);
486                        tracing::debug!("Parsed array content: {:?}", parsed);
487                        parsed
488                    },
489                    |closing_paren_pos| {
490                        // Single-line array (may have content after closing paren): conflicts=('foo' 'bar') or conflicts+=('foo' 'bar') other_code
491                        let array_content = &value[1..closing_paren_pos];
492                        tracing::debug!("Parsing single-line {} array: {}", key, array_content);
493                        let parsed = parse_array_content(array_content);
494                        tracing::debug!("Parsed array content: {:?}", parsed);
495                        parsed
496                    },
497                );
498
499                // Filter out invalid conflicts (.so files, invalid names, etc.)
500                let filtered_conflicts: Vec<String> = conflict_deps
501                    .into_iter()
502                    .filter_map(|conflict| {
503                        let conflict_trimmed = conflict.trim();
504                        if conflict_trimmed.is_empty() {
505                            return None;
506                        }
507
508                        // Filter out .so files (virtual packages)
509                        let conflict_lower = conflict_trimmed.to_lowercase();
510                        if std::path::Path::new(&conflict_lower)
511                            .extension()
512                            .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
513                            || conflict_lower.contains(".so.")
514                            || conflict_lower.contains(".so=")
515                        {
516                            return None;
517                        }
518
519                        // Filter out names ending with ) - this is a parsing error
520                        if conflict_trimmed.ends_with(')') {
521                            return None;
522                        }
523
524                        // Filter out names that don't look like package names
525                        let first_char = conflict_trimmed.chars().next().unwrap_or(' ');
526                        if !first_char.is_alphanumeric() && first_char != '_' {
527                            return None;
528                        }
529
530                        // Filter out names that are too short
531                        if conflict_trimmed.len() < 2 {
532                            return None;
533                        }
534
535                        // Filter out names containing invalid characters (but allow version operators)
536                        let has_valid_chars = conflict_trimmed
537                            .chars()
538                            .any(|c| c.is_alphanumeric() || c == '-' || c == '_');
539                        if !has_valid_chars {
540                            return None;
541                        }
542
543                        // Extract package name (remove version constraints if present)
544                        // Use a simple approach: split on version operators
545                        let pkg_name = conflict_trimmed.find(['>', '<', '=']).map_or_else(
546                            || conflict_trimmed.to_string(),
547                            |pos| conflict_trimmed[..pos].trim().to_string(),
548                        );
549                        if pkg_name.is_empty() {
550                            None
551                        } else {
552                            Some(pkg_name)
553                        }
554                    })
555                    .collect();
556
557                // Add conflicts to the vector (using base_key to handle both = and +=)
558                conflicts.extend(filtered_conflicts);
559            }
560        }
561    }
562
563    conflicts
564}