pacsea/logic/sandbox/parse.rs
1//! Parsing functions for .SRCINFO and PKGBUILD dependency extraction.
2
3/// What: Parse dependencies from .SRCINFO content.
4///
5/// Inputs:
6/// - `srcinfo`: Raw .SRCINFO file content.
7///
8/// Output:
9/// - Returns a tuple of (depends, makedepends, checkdepends, optdepends) vectors.
10#[allow(clippy::case_sensitive_file_extension_comparisons)]
11pub(super) fn parse_srcinfo_deps(
12 srcinfo: &str,
13) -> (Vec<String>, Vec<String>, Vec<String>, Vec<String>) {
14 let mut depends = Vec::new();
15 let mut makedepends = Vec::new();
16 let mut checkdepends = Vec::new();
17 let mut optdepends = Vec::new();
18
19 for line in srcinfo.lines() {
20 let line = line.trim();
21 if line.is_empty() || line.starts_with('#') {
22 continue;
23 }
24
25 // .SRCINFO format: key = value
26 if let Some((key, value)) = line.split_once('=') {
27 let key = key.trim();
28 let value = value.trim();
29
30 // Filter out virtual packages (.so files)
31 let value_lower = value.to_lowercase();
32 if value_lower.ends_with(".so")
33 || value_lower.contains(".so.")
34 || value_lower.contains(".so=")
35 {
36 continue;
37 }
38
39 match key {
40 "depends" => depends.push(value.to_string()),
41 "makedepends" => makedepends.push(value.to_string()),
42 "checkdepends" => checkdepends.push(value.to_string()),
43 "optdepends" => optdepends.push(value.to_string()),
44 _ => {}
45 }
46 }
47 }
48
49 (depends, makedepends, checkdepends, optdepends)
50}
51
52/// What: Parse dependencies from PKGBUILD content.
53///
54/// Inputs:
55/// - `pkgbuild`: Raw PKGBUILD file content.
56///
57/// Output:
58/// - Returns a tuple of (depends, makedepends, checkdepends, optdepends) vectors.
59///
60/// Details:
61/// - Parses bash array syntax: `depends=('foo' 'bar>=1.2')` (single-line)
62/// - Also handles `depends+=` patterns used in functions like `package()`
63/// - Handles both quoted and unquoted dependencies
64/// - Also handles multi-line arrays:
65/// ```text
66/// depends=(
67/// 'foo'
68/// 'bar>=1.2'
69/// )
70/// ```
71/// - Filters out .so files (virtual packages) and invalid package names
72/// - Only parses specific dependency fields (depends, makedepends, checkdepends, optdepends)
73pub fn parse_pkgbuild_deps(pkgbuild: &str) -> (Vec<String>, Vec<String>, Vec<String>, Vec<String>) {
74 tracing::debug!(
75 "parse_pkgbuild_deps: Starting parse, PKGBUILD length={}, first 500 chars: {:?}",
76 pkgbuild.len(),
77 pkgbuild.chars().take(500).collect::<String>()
78 );
79 let mut depends = Vec::new();
80 let mut makedepends = Vec::new();
81 let mut checkdepends = Vec::new();
82 let mut optdepends = Vec::new();
83
84 let lines: Vec<&str> = pkgbuild.lines().collect();
85 tracing::debug!(
86 "parse_pkgbuild_deps: Total lines in PKGBUILD: {}",
87 lines.len()
88 );
89 let mut i = 0;
90
91 while i < lines.len() {
92 let line = lines[i].trim();
93 i += 1;
94
95 if line.is_empty() || line.starts_with('#') {
96 continue;
97 }
98
99 // Parse array declarations: depends=('foo' 'bar') or depends=( or depends+=('foo' 'bar')
100 if let Some((key, value)) = line.split_once('=') {
101 let key = key.trim();
102 let value = value.trim();
103
104 // Handle both depends= and depends+= patterns
105 let base_key = key.strip_suffix('+').map_or(key, |stripped| stripped);
106
107 // Only parse specific dependency fields, ignore other PKGBUILD fields
108 if !matches!(
109 base_key,
110 "depends" | "makedepends" | "checkdepends" | "optdepends"
111 ) {
112 continue;
113 }
114
115 tracing::debug!(
116 "parse_pkgbuild_deps: Found key-value pair: key='{}', base_key='{}', value='{}'",
117 key,
118 base_key,
119 value.chars().take(100).collect::<String>()
120 );
121
122 // Check if this is an array declaration
123 if value.starts_with('(') {
124 tracing::debug!(
125 "parse_pkgbuild_deps: Detected array declaration for key='{}'",
126 key
127 );
128 let deps = find_matching_closing_paren(value).map_or_else(
129 || {
130 // Multi-line array: depends=(
131 // 'foo'
132 // 'bar'
133 // )
134 tracing::debug!("Parsing multi-line {} array", key);
135 let mut array_lines = Vec::new();
136 // Collect lines until we find the closing parenthesis
137 while i < lines.len() {
138 let next_line = lines[i].trim();
139 i += 1;
140
141 // Skip empty lines and comments
142 if next_line.is_empty() || next_line.starts_with('#') {
143 continue;
144 }
145
146 // Check if this line closes the array
147 if next_line == ")" {
148 break;
149 }
150
151 // Check if this line contains a closing parenthesis (may be on same line as content)
152 if let Some(paren_pos) = next_line.find(')') {
153 // Extract content before the closing paren
154 let content_before_paren = &next_line[..paren_pos].trim();
155 if !content_before_paren.is_empty() {
156 array_lines.push((*content_before_paren).to_string());
157 }
158 break;
159 }
160
161 // Add this line to the array content
162 array_lines.push(next_line.to_string());
163 }
164
165 // Parse all collected lines as array content
166 // Ensure proper spacing between items (each line should be a separate item)
167 let array_content = array_lines
168 .iter()
169 .map(|s| s.trim())
170 .filter(|s| !s.is_empty())
171 .collect::<Vec<_>>()
172 .join(" ");
173 tracing::debug!(
174 "Collected {} lines for multi-line {} array: {}",
175 array_lines.len(),
176 key,
177 array_content
178 );
179 let parsed = parse_array_content(&array_content);
180 tracing::debug!("Parsed array content: {:?}", parsed);
181 parsed
182 },
183 |closing_paren_pos| {
184 // Single-line array (may have content after closing paren): depends=('foo' 'bar') or depends+=('foo' 'bar') other_code
185 let array_content = &value[1..closing_paren_pos];
186 tracing::debug!("Parsing single-line {} array: {}", key, array_content);
187 let parsed = parse_array_content(array_content);
188 tracing::debug!("Parsed array content: {:?}", parsed);
189 parsed
190 },
191 );
192
193 // Filter out invalid dependencies (.so files, invalid names, etc.)
194 let filtered_deps: Vec<String> = deps
195 .into_iter()
196 .filter_map(|dep| {
197 let dep_trimmed = dep.trim();
198 if dep_trimmed.is_empty() {
199 return None;
200 }
201
202 // Filter out .so files (virtual packages)
203 let dep_lower = dep_trimmed.to_lowercase();
204 if std::path::Path::new(&dep_lower)
205 .extension()
206 .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
207 || dep_lower.contains(".so.")
208 || dep_lower.contains(".so=")
209 {
210 return None;
211 }
212
213 // Filter out names ending with ) - this is a parsing error
214 // But first check if it's actually a valid name with version constraint ending in )
215 // like "package>=1.0)" which would be a parsing error
216 if dep_trimmed.ends_with(')') {
217 // Check if it might be a valid version constraint that accidentally ends with )
218 // If it contains version operators before the ), it's likely a parsing error
219 if dep_trimmed.contains(">=")
220 || dep_trimmed.contains("<=")
221 || dep_trimmed.contains("==")
222 {
223 // This looks like "package>=1.0)" which is invalid
224 return None;
225 }
226 // Otherwise, it might be "package)" which is also invalid
227 return None;
228 }
229
230 // Filter out names that don't look like package names
231 // Package names should start with alphanumeric or underscore
232 let first_char = dep_trimmed.chars().next().unwrap_or(' ');
233 if !first_char.is_alphanumeric() && first_char != '_' {
234 return None;
235 }
236
237 // Filter out names that are too short
238 if dep_trimmed.len() < 2 {
239 return None;
240 }
241
242 // Filter out names containing invalid characters (but allow version operators)
243 // Allow: alphanumeric, dash, underscore, and version operators (>=, <=, ==, >, <)
244 let has_valid_chars = dep_trimmed
245 .chars()
246 .any(|c| c.is_alphanumeric() || c == '-' || c == '_');
247 if !has_valid_chars {
248 return None;
249 }
250
251 Some(dep_trimmed.to_string())
252 })
253 .collect();
254
255 // Add dependencies to the appropriate vector (using base_key to handle both = and +=)
256 match base_key {
257 "depends" => depends.extend(filtered_deps),
258 "makedepends" => makedepends.extend(filtered_deps),
259 "checkdepends" => checkdepends.extend(filtered_deps),
260 "optdepends" => optdepends.extend(filtered_deps),
261 _ => {}
262 }
263 }
264 }
265 }
266
267 (depends, makedepends, checkdepends, optdepends)
268}
269
270/// What: Find the position of the matching closing parenthesis in a string.
271///
272/// Inputs:
273/// - `s`: String starting with an opening parenthesis.
274///
275/// Output:
276/// - `Some(position)` if a matching closing parenthesis is found, `None` otherwise.
277///
278/// Details:
279/// - Handles nested parentheses and quoted strings.
280fn find_matching_closing_paren(s: &str) -> Option<usize> {
281 let mut depth = 0;
282 let mut in_quotes = false;
283 let mut quote_char = '\0';
284
285 for (pos, ch) in s.char_indices() {
286 match ch {
287 '\'' | '"' => {
288 if !in_quotes {
289 in_quotes = true;
290 quote_char = ch;
291 } else if ch == quote_char {
292 in_quotes = false;
293 quote_char = '\0';
294 }
295 }
296 '(' if !in_quotes => {
297 depth += 1;
298 }
299 ')' if !in_quotes => {
300 depth -= 1;
301 if depth == 0 {
302 return Some(pos);
303 }
304 }
305 _ => {}
306 }
307 }
308 None
309}
310
311/// What: Parse quoted and unquoted strings from bash array content.
312///
313/// Inputs:
314/// - `content`: Array content string (e.g., "'foo' 'bar>=1.2'" or "libcairo.so libdbus-1.so").
315///
316/// Output:
317/// - Vector of dependency strings.
318///
319/// Details:
320/// - Handles both quoted ('foo') and unquoted (foo) dependencies.
321/// - Splits on whitespace for unquoted values.
322fn parse_array_content(content: &str) -> Vec<String> {
323 let mut deps = Vec::new();
324 let mut in_quotes = false;
325 let mut quote_char = '\0';
326 let mut current = String::new();
327
328 for ch in content.chars() {
329 match ch {
330 '\'' | '"' => {
331 if !in_quotes {
332 in_quotes = true;
333 quote_char = ch;
334 } else if ch == quote_char {
335 if !current.is_empty() {
336 deps.push(current.clone());
337 current.clear();
338 }
339 in_quotes = false;
340 quote_char = '\0';
341 } else {
342 current.push(ch);
343 }
344 }
345 _ if in_quotes => {
346 current.push(ch);
347 }
348 ch if ch.is_whitespace() => {
349 // Whitespace outside quotes - end current unquoted value
350 if !current.is_empty() {
351 deps.push(current.clone());
352 current.clear();
353 }
354 }
355 _ => {
356 // Non-whitespace character outside quotes - add to current value
357 current.push(ch);
358 }
359 }
360 }
361
362 // Handle unclosed quote or trailing unquoted value
363 if !current.is_empty() {
364 deps.push(current);
365 }
366
367 deps
368}
369
370/// What: Parse conflicts from PKGBUILD content.
371///
372/// Inputs:
373/// - `pkgbuild`: Raw PKGBUILD file content.
374///
375/// Output:
376/// - Returns a vector of conflicting package names.
377///
378/// Details:
379/// - Parses bash array syntax: `conflicts=('foo' 'bar')` (single-line)
380/// - Also handles `conflicts+=` patterns used in functions like `package()`
381/// - Handles both quoted and unquoted conflicts
382/// - Also handles multi-line arrays:
383/// ```text
384/// conflicts=(
385/// 'foo'
386/// 'bar'
387/// )
388/// ```
389/// - Filters out .so files (virtual packages) and invalid package names
390/// - Extracts package names from version constraints (e.g., "jujutsu-git>=1.0" -> "jujutsu-git")
391pub fn parse_pkgbuild_conflicts(pkgbuild: &str) -> Vec<String> {
392 tracing::debug!(
393 "parse_pkgbuild_conflicts: Starting parse, PKGBUILD length={}",
394 pkgbuild.len()
395 );
396 let mut conflicts = Vec::new();
397
398 let lines: Vec<&str> = pkgbuild.lines().collect();
399 let mut i = 0;
400
401 while i < lines.len() {
402 let line = lines[i].trim();
403 i += 1;
404
405 if line.is_empty() || line.starts_with('#') {
406 continue;
407 }
408
409 // Parse array declarations: conflicts=('foo' 'bar') or conflicts=( or conflicts+=('foo' 'bar')
410 if let Some((key, value)) = line.split_once('=') {
411 let key = key.trim();
412 let value = value.trim();
413
414 // Handle both conflicts= and conflicts+= patterns
415 let base_key = key.strip_suffix('+').map_or(key, |stripped| stripped);
416
417 // Only parse conflicts field
418 if base_key != "conflicts" {
419 continue;
420 }
421
422 tracing::debug!(
423 "parse_pkgbuild_conflicts: Found key-value pair: key='{}', base_key='{}', value='{}'",
424 key,
425 base_key,
426 value.chars().take(100).collect::<String>()
427 );
428
429 // Check if this is an array declaration
430 if value.starts_with('(') {
431 tracing::debug!(
432 "parse_pkgbuild_conflicts: Detected array declaration for key='{}'",
433 key
434 );
435 let conflict_deps = find_matching_closing_paren(value).map_or_else(
436 || {
437 // Multi-line array: conflicts=(
438 // 'foo'
439 // 'bar'
440 // )
441 tracing::debug!("Parsing multi-line {} array", key);
442 let mut array_lines = Vec::new();
443 // Collect lines until we find the closing parenthesis
444 while i < lines.len() {
445 let next_line = lines[i].trim();
446 i += 1;
447
448 // Skip empty lines and comments
449 if next_line.is_empty() || next_line.starts_with('#') {
450 continue;
451 }
452
453 // Check if this line closes the array
454 if next_line == ")" {
455 break;
456 }
457
458 // Check if this line contains a closing parenthesis (may be on same line as content)
459 if let Some(paren_pos) = next_line.find(')') {
460 // Extract content before the closing paren
461 let content_before_paren = &next_line[..paren_pos].trim();
462 if !content_before_paren.is_empty() {
463 array_lines.push((*content_before_paren).to_string());
464 }
465 break;
466 }
467
468 // Add this line to the array content
469 array_lines.push(next_line.to_string());
470 }
471
472 // Parse all collected lines as array content
473 let array_content = array_lines
474 .iter()
475 .map(|s| s.trim())
476 .filter(|s| !s.is_empty())
477 .collect::<Vec<_>>()
478 .join(" ");
479 tracing::debug!(
480 "Collected {} lines for multi-line {} array: {}",
481 array_lines.len(),
482 key,
483 array_content
484 );
485 let parsed = parse_array_content(&array_content);
486 tracing::debug!("Parsed array content: {:?}", parsed);
487 parsed
488 },
489 |closing_paren_pos| {
490 // Single-line array (may have content after closing paren): conflicts=('foo' 'bar') or conflicts+=('foo' 'bar') other_code
491 let array_content = &value[1..closing_paren_pos];
492 tracing::debug!("Parsing single-line {} array: {}", key, array_content);
493 let parsed = parse_array_content(array_content);
494 tracing::debug!("Parsed array content: {:?}", parsed);
495 parsed
496 },
497 );
498
499 // Filter out invalid conflicts (.so files, invalid names, etc.)
500 let filtered_conflicts: Vec<String> = conflict_deps
501 .into_iter()
502 .filter_map(|conflict| {
503 let conflict_trimmed = conflict.trim();
504 if conflict_trimmed.is_empty() {
505 return None;
506 }
507
508 // Filter out .so files (virtual packages)
509 let conflict_lower = conflict_trimmed.to_lowercase();
510 if std::path::Path::new(&conflict_lower)
511 .extension()
512 .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
513 || conflict_lower.contains(".so.")
514 || conflict_lower.contains(".so=")
515 {
516 return None;
517 }
518
519 // Filter out names ending with ) - this is a parsing error
520 if conflict_trimmed.ends_with(')') {
521 return None;
522 }
523
524 // Filter out names that don't look like package names
525 let first_char = conflict_trimmed.chars().next().unwrap_or(' ');
526 if !first_char.is_alphanumeric() && first_char != '_' {
527 return None;
528 }
529
530 // Filter out names that are too short
531 if conflict_trimmed.len() < 2 {
532 return None;
533 }
534
535 // Filter out names containing invalid characters (but allow version operators)
536 let has_valid_chars = conflict_trimmed
537 .chars()
538 .any(|c| c.is_alphanumeric() || c == '-' || c == '_');
539 if !has_valid_chars {
540 return None;
541 }
542
543 // Extract package name (remove version constraints if present)
544 // Use a simple approach: split on version operators
545 let pkg_name = conflict_trimmed.find(['>', '<', '=']).map_or_else(
546 || conflict_trimmed.to_string(),
547 |pos| conflict_trimmed[..pos].trim().to_string(),
548 );
549 if pkg_name.is_empty() {
550 None
551 } else {
552 Some(pkg_name)
553 }
554 })
555 .collect();
556
557 // Add conflicts to the vector (using base_key to handle both = and +=)
558 conflicts.extend(filtered_conflicts);
559 }
560 }
561 }
562
563 conflicts
564}