pacsea/util/curl.rs
1//! Curl-based HTTP utilities for fetching JSON and text content.
2//!
3//! This module provides functions for executing curl commands and handling
4//! common error cases with user-friendly error messages.
5//!
6//! # Security
7//! - Uses absolute paths for curl binary when available (defense-in-depth against PATH hijacking)
8//! - Redacts URL query parameters in debug logs to prevent potential secret leakage
9
10use super::curl_args;
11use chrono;
12use serde_json::Value;
13use std::sync::OnceLock;
14
15/// What: Result type alias for curl utility errors.
16///
17/// Inputs: None (type alias).
18///
19/// Output: Result type with boxed error trait object.
20///
21/// Details: Standard error type for curl operations.
22type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
23
24/// Cached curl binary path for performance (computed once at first use).
25static CURL_PATH: OnceLock<String> = OnceLock::new();
26
27/// What: Find the curl binary path, preferring absolute paths for security.
28///
29/// Inputs: None
30///
31/// Output:
32/// - Path to curl binary (absolute path if found, otherwise "curl" for PATH lookup)
33///
34/// Details:
35/// - If `PACSEA_CURL_PATH` env var is set, returns "curl" to use PATH lookup (for testing)
36/// - On Unix: Checks `/usr/bin/curl`, `/bin/curl`, `/usr/local/bin/curl`
37/// - On Windows: Checks system paths (System32, Git, MSYS2, Cygwin, Chocolatey)
38/// and user paths (Scoop, `WinGet`, local installs)
39/// - Falls back to PATH lookup if no absolute path is found
40/// - Result is cached for performance using `OnceLock` (except when env override is set)
41/// - Defense-in-depth measure against PATH hijacking attacks
42fn get_curl_path() -> &'static str {
43 // Check for test override BEFORE using cache - allows tests to inject fake curl
44 // This check is outside OnceLock so it's evaluated on every call
45 if std::env::var("PACSEA_CURL_PATH").is_ok() {
46 // Leak a static string for the "curl" fallback in test mode
47 // This is intentional: tests need a consistent &'static str return type
48 return Box::leak(Box::new("curl".to_string()));
49 }
50
51 CURL_PATH.get_or_init(|| {
52 // Check common absolute paths first (defense-in-depth against PATH hijacking)
53 #[cfg(unix)]
54 {
55 for path in ["/usr/bin/curl", "/bin/curl", "/usr/local/bin/curl"] {
56 if std::path::Path::new(path).exists() {
57 tracing::trace!(curl_path = path, "Using absolute path for curl");
58 return path.to_string();
59 }
60 }
61 }
62
63 #[cfg(target_os = "windows")]
64 {
65 // On Windows, check common system installation paths first
66 let system_paths = [
67 r"C:\Windows\System32\curl.exe",
68 r"C:\Program Files\Git\mingw64\bin\curl.exe",
69 r"C:\Program Files (x86)\Git\mingw64\bin\curl.exe",
70 r"C:\Program Files\curl\bin\curl.exe",
71 r"C:\curl\bin\curl.exe",
72 r"C:\ProgramData\chocolatey\bin\curl.exe",
73 r"C:\msys64\usr\bin\curl.exe",
74 r"C:\msys64\mingw64\bin\curl.exe",
75 r"C:\cygwin64\bin\curl.exe",
76 r"C:\cygwin\bin\curl.exe",
77 ];
78
79 for path in system_paths {
80 if std::path::Path::new(path).exists() {
81 tracing::trace!(curl_path = path, "Using absolute path for curl on Windows");
82 return path.to_string();
83 }
84 }
85
86 // Check user-specific paths (Scoop, MSYS2, local installs)
87 if let Ok(user_profile) = std::env::var("USERPROFILE") {
88 let user_paths = [
89 // Scoop
90 format!(r"{user_profile}\scoop\shims\curl.exe"),
91 format!(r"{user_profile}\scoop\apps\curl\current\bin\curl.exe"),
92 format!(r"{user_profile}\scoop\apps\msys2\current\usr\bin\curl.exe"),
93 format!(r"{user_profile}\scoop\apps\msys2\current\mingw64\bin\curl.exe"),
94 // MSYS2 user installs
95 format!(r"{user_profile}\msys64\usr\bin\curl.exe"),
96 format!(r"{user_profile}\msys64\mingw64\bin\curl.exe"),
97 format!(r"{user_profile}\msys2\usr\bin\curl.exe"),
98 format!(r"{user_profile}\msys2\mingw64\bin\curl.exe"),
99 // Other user paths
100 format!(r"{user_profile}\.local\bin\curl.exe"),
101 format!(r"{user_profile}\AppData\Local\Microsoft\WinGet\Packages\curl.exe"),
102 ];
103
104 for path in user_paths {
105 if std::path::Path::new(&path).exists() {
106 tracing::trace!(
107 curl_path = %path,
108 "Using user-specific path for curl on Windows"
109 );
110 return path;
111 }
112 }
113 }
114 }
115
116 // Fallback to PATH lookup
117 tracing::trace!("No absolute curl path found, falling back to PATH lookup");
118 "curl".to_string()
119 })
120}
121
122/// What: Resolved curl executable path for subprocess or shell wrappers (e.g. privileged fetch).
123///
124/// Inputs:
125/// - None.
126///
127/// Output:
128/// - Path string matching the same resolution used by curl helpers (absolute binary when found).
129///
130/// Details:
131/// - Prefer this over hard-coding `curl` so planners can probe `--version` and build argv-safe commands.
132#[must_use]
133pub fn curl_binary_path() -> &'static str {
134 get_curl_path()
135}
136
137/// What: Redact query parameters from a URL for safe logging.
138///
139/// Inputs:
140/// - `url`: The full URL that may contain query parameters
141///
142/// Output:
143/// - URL with query parameters replaced by `?[REDACTED]` if present
144///
145/// Details:
146/// - Prevents potential secret leakage in logs (API keys, tokens in query strings)
147/// - Returns original URL if no query parameters are present
148#[cfg(target_os = "windows")]
149fn redact_url_for_logging(url: &str) -> String {
150 url.find('?').map_or_else(
151 || url.to_string(),
152 |query_start| format!("{}?[REDACTED]", &url[..query_start]),
153 )
154}
155
156/// What: Extract HTTP code from curl's `-w` output format.
157///
158/// Inputs:
159/// - `output`: The stdout output from curl that may contain `__HTTP_CODE__:XXX`
160///
161/// Output:
162/// - Some(u16) if an HTTP code was found, None otherwise
163///
164/// Details:
165/// - Looks for the `__HTTP_CODE__:` marker we add via `-w` flag
166fn extract_http_code_from_output(output: &str) -> Option<u16> {
167 output
168 .lines()
169 .find(|line| line.starts_with("__HTTP_CODE__:"))
170 .and_then(|line| line.strip_prefix("__HTTP_CODE__:"))
171 .and_then(|code| code.trim().parse().ok())
172}
173
174/// What: Extract HTTP code from curl's stderr error message.
175///
176/// Inputs:
177/// - `stderr`: The stderr output from curl
178///
179/// Output:
180/// - Some(u16) if an HTTP code was found in the error message, None otherwise
181///
182/// Details:
183/// - Parses curl's error format: "The requested URL returned error: XXX"
184fn extract_http_code_from_stderr(stderr: &str) -> Option<u16> {
185 // curl stderr format: "curl: (22) The requested URL returned error: 404"
186 stderr
187 .find("returned error: ")
188 .map(|idx| &stderr[idx + "returned error: ".len()..])
189 .and_then(|s| {
190 // Extract just the numeric part
191 let code_str: String = s.chars().take_while(char::is_ascii_digit).collect();
192 code_str.parse().ok()
193 })
194}
195
196/// What: Maps curl exit code to a human-readable error message with HTTP code info.
197///
198/// Inputs:
199/// - `code`: Exit code from curl process.
200/// - `status`: The full process exit status for signal handling.
201/// - `http_code`: The actual HTTP status code from the server.
202///
203/// Output:
204/// - Human-readable error string describing the network issue with specific HTTP code.
205///
206/// Details:
207/// - Provides more specific error messages when HTTP code is known
208/// - 404 is "Resource not found", 429 is "Rate limited", etc.
209fn map_curl_error_with_http_code(
210 code: Option<i32>,
211 status: std::process::ExitStatus,
212 http_code: u16,
213) -> String {
214 // If we have the actual HTTP code, provide a more specific message
215 match http_code {
216 404 => "HTTP 404: Resource not found (package may not exist in repository)".to_string(),
217 429 => "HTTP 429: Rate limited by server".to_string(),
218 500 => "HTTP 500: Internal server error".to_string(),
219 502 => "HTTP 502: Bad gateway".to_string(),
220 503 => "HTTP 503: Service temporarily unavailable".to_string(),
221 504 => "HTTP 504: Gateway timeout".to_string(),
222 _ if (400..500).contains(&http_code) => {
223 format!("HTTP {http_code}: Client error")
224 }
225 _ if http_code >= 500 => {
226 format!("HTTP {http_code}: Server error (temporarily unavailable)")
227 }
228 _ => map_curl_error(code, status),
229 }
230}
231
232/// What: Map curl exit codes to user-friendly error messages.
233///
234/// Inputs:
235/// - `code`: Optional exit code from curl command
236/// - `status`: Exit status for fallback error message
237///
238/// Output:
239/// - User-friendly error message string
240///
241/// Details:
242/// - Maps common curl exit codes (22, 6, 7, 28) to descriptive messages
243/// - Falls back to generic error message if code is unknown
244fn map_curl_error(code: Option<i32>, status: std::process::ExitStatus) -> String {
245 code.map_or_else(
246 || {
247 // Process was terminated by a signal or other reason
248 #[cfg(unix)]
249 {
250 use std::os::unix::process::ExitStatusExt;
251 status.signal().map_or_else(
252 || format!("curl process failed: {status:?}"),
253 |signal| format!("curl process terminated by signal {signal}"),
254 )
255 }
256 #[cfg(not(unix))]
257 {
258 format!("curl process failed: {status:?}")
259 }
260 },
261 |code| match code {
262 22 => "HTTP error from server (code unknown)".to_string(),
263 6 => "Could not resolve host (DNS/network issue)".to_string(),
264 7 => "Failed to connect to host (network unreachable)".to_string(),
265 28 => "Operation timeout".to_string(),
266 _ => format!("curl failed with exit code {code}"),
267 },
268 )
269}
270
271/// What: Fetch JSON from a URL using curl and parse into `serde_json::Value`.
272///
273/// Inputs:
274/// - `url`: HTTP(S) URL to request
275///
276/// Output:
277/// - `Ok(Value)` on success; `Err` if curl fails or the response is not valid JSON
278///
279/// # Errors
280/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
281/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
282/// - Returns `Err` when response body cannot be decoded as UTF-8
283/// - Returns `Err` when response body cannot be parsed as JSON
284///
285/// Details:
286/// - Executes curl with appropriate flags and parses the UTF-8 body with `serde_json`.
287/// - On Windows, uses `-k` flag to skip SSL certificate verification.
288/// - Provides user-friendly error messages for common curl failure cases.
289/// - For HTTP errors, includes the actual status code in the error message when available.
290pub fn curl_json(url: &str) -> Result<Value> {
291 let mut args = curl_args(url, &[]);
292 // Add write-out format to capture HTTP status code on failure
293 // The %{http_code} is curl's write-out format, not a Rust format string
294 #[allow(clippy::literal_string_with_formatting_args)]
295 let write_out_format = "\n__HTTP_CODE__:%{http_code}".to_string();
296 args.push("-w".to_string());
297 args.push(write_out_format);
298 let curl_bin = get_curl_path();
299 #[cfg(target_os = "windows")]
300 {
301 // On Windows, log curl command for debugging (URL redacted for security)
302 let safe_url = redact_url_for_logging(url);
303 tracing::debug!(
304 curl_bin = %curl_bin,
305 url = %safe_url,
306 "Executing curl command on Windows"
307 );
308 }
309 let out = std::process::Command::new(curl_bin).args(&args).output()?;
310 if !out.status.success() {
311 // Try to extract HTTP status code from stderr or stdout
312 let stderr = String::from_utf8_lossy(&out.stderr);
313 let stdout = String::from_utf8_lossy(&out.stdout);
314
315 // Look for HTTP code in the output
316 let http_code = extract_http_code_from_output(&stdout)
317 .or_else(|| extract_http_code_from_stderr(&stderr));
318
319 let error_msg = if let Some(code) = http_code {
320 map_curl_error_with_http_code(out.status.code(), out.status, code)
321 } else {
322 map_curl_error(out.status.code(), out.status)
323 };
324
325 #[cfg(target_os = "windows")]
326 {
327 let safe_url = redact_url_for_logging(url);
328 // On Windows, also log stderr for debugging
329 if !stderr.is_empty() {
330 tracing::warn!(stderr = %stderr, url = %safe_url, "curl stderr output on Windows");
331 }
332 // Also log stdout in case there's useful info there
333 if !stdout.is_empty() {
334 tracing::debug!(stdout = %stdout, url = %safe_url, "curl stdout on Windows (non-success)");
335 }
336 }
337 return Err(error_msg.into());
338 }
339 let raw_body = String::from_utf8(out.stdout)?;
340 // Strip the __HTTP_CODE__:XXX suffix we added via -w flag
341 let body = raw_body
342 .lines()
343 .filter(|line| !line.starts_with("__HTTP_CODE__:"))
344 .collect::<Vec<_>>()
345 .join("\n");
346 #[cfg(target_os = "windows")]
347 {
348 // On Windows, log response details for debugging API issues (URL redacted)
349 let safe_url = redact_url_for_logging(url);
350 if body.len() < 500 {
351 tracing::debug!(
352 url = %safe_url,
353 response_length = body.len(),
354 "curl response received on Windows"
355 );
356 } else {
357 tracing::debug!(
358 url = %safe_url,
359 response_length = body.len(),
360 "curl response received on Windows (truncated)"
361 );
362 }
363 }
364 let v: Value = serde_json::from_str(&body)?;
365 Ok(v)
366}
367
368/// What: Fetch plain text from a URL using curl.
369///
370/// Inputs:
371/// - `url`: URL to request
372///
373/// Output:
374/// - `Ok(String)` with response body; `Err` if curl or UTF-8 decoding fails
375///
376/// # Errors
377/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
378/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
379/// - Returns `Err` when response body cannot be decoded as UTF-8
380///
381/// Details:
382/// - Executes curl with appropriate flags and returns the raw body as a `String`.
383/// - On Windows, uses `-k` flag to skip SSL certificate verification.
384/// - Provides user-friendly error messages for common curl failure cases.
385pub fn curl_text(url: &str) -> Result<String> {
386 curl_text_with_args(url, &[])
387}
388
389/// What: Parse Retry-After header value into seconds.
390///
391/// Inputs:
392/// - `retry_after`: Retry-After header value (can be seconds as number or HTTP-date)
393///
394/// Output:
395/// - `Some(seconds)` if parsing succeeds, `None` otherwise
396///
397/// Details:
398/// - Supports both numeric format (seconds) and HTTP-date format (RFC 7231).
399/// - For HTTP-date, calculates seconds until that date.
400fn parse_retry_after(retry_after: &str) -> Option<u64> {
401 let trimmed = retry_after.trim();
402 // Try parsing as number (seconds)
403 if let Ok(seconds) = trimmed.parse::<u64>() {
404 return Some(seconds);
405 }
406 // Try parsing as HTTP-date (RFC 7231)
407 // Common formats: "Wed, 21 Oct 2015 07:28:00 GMT", "Wed, 21 Oct 2015 07:28:00 +0000"
408 if let Ok(dt) = chrono::DateTime::parse_from_rfc2822(trimmed) {
409 let now = chrono::Utc::now();
410 let retry_time = dt.with_timezone(&chrono::Utc);
411 if retry_time > now {
412 let duration = retry_time - now;
413 let seconds = duration.num_seconds().max(0);
414 // Safe: seconds is non-negative, and u64::MAX is much larger than any reasonable retry time
415 #[allow(clippy::cast_sign_loss)]
416 return Some(seconds as u64);
417 }
418 return Some(0);
419 }
420 // Try RFC 3339 format
421 if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(trimmed) {
422 let now = chrono::Utc::now();
423 let retry_time = dt.with_timezone(&chrono::Utc);
424 if retry_time > now {
425 let duration = retry_time - now;
426 let seconds = duration.num_seconds().max(0);
427 // Safe: seconds is non-negative, and u64::MAX is much larger than any reasonable retry time
428 #[allow(clippy::cast_sign_loss)]
429 return Some(seconds as u64);
430 }
431 return Some(0);
432 }
433 None
434}
435
436/// What: Extract header value from HTTP response headers (case-insensitive).
437///
438/// Inputs:
439/// - `headers_text`: Raw HTTP headers text (from curl -i output)
440/// - `header_name`: Name of the header to extract (case-insensitive)
441///
442/// Output:
443/// - `Some(value)` if header found, `None` otherwise
444///
445/// Details:
446/// - Searches for header name (case-insensitive).
447/// - Returns trimmed value after the colon.
448fn extract_header_value(headers_text: &str, header_name: &str) -> Option<String> {
449 let header_lower = header_name.to_lowercase();
450 for line in headers_text.lines() {
451 let line_lower = line.trim_start().to_lowercase();
452 if line_lower.starts_with(&format!("{header_lower}:"))
453 && let Some(colon_pos) = line.find(':')
454 {
455 let value = line[colon_pos + 1..].trim().to_string();
456 return Some(value);
457 }
458 }
459 None
460}
461
462/// What: Extract Retry-After header value from HTTP response headers.
463///
464/// Inputs:
465/// - `headers_text`: Raw HTTP headers text (from curl -i output)
466///
467/// Output:
468/// - `Some(seconds)` if Retry-After header found and parsed, `None` otherwise
469///
470/// Details:
471/// - Searches for "Retry-After:" header (case-insensitive).
472/// - Parses the value using `parse_retry_after()`.
473fn extract_retry_after(headers_text: &str) -> Option<u64> {
474 extract_header_value(headers_text, "Retry-After")
475 .as_deref()
476 .and_then(parse_retry_after)
477}
478
479/// Response metadata including headers for parsing `Retry-After`, `ETag`, and `Last-Modified`.
480#[derive(Debug, Clone)]
481pub struct CurlResponse {
482 /// Response body.
483 pub body: String,
484 /// HTTP status code.
485 pub status_code: Option<u16>,
486 /// Retry-After header value in seconds, if present.
487 pub retry_after_seconds: Option<u64>,
488 /// `ETag` header value, if present.
489 pub etag: Option<String>,
490 /// Last-Modified header value, if present.
491 pub last_modified: Option<String>,
492}
493
494/// What: Fetch plain text from a URL using curl with custom arguments, including headers.
495///
496/// Inputs:
497/// - `url`: URL to request
498/// - `extra_args`: Additional curl arguments (e.g., `["--max-time", "10"]`)
499///
500/// Output:
501/// - `Ok(CurlResponse)` with response body, status code, and parsed headers; `Err` if curl or UTF-8 decoding fails
502///
503/// # Errors
504/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
505/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
506/// - Returns `Err` when response body cannot be decoded as UTF-8
507///
508/// Details:
509/// - Executes curl with `-i` flag to include headers in output.
510/// - Uses `-w "\n%{http_code}\n"` to get HTTP status code at the end.
511/// - Parses Retry-After header from response headers.
512/// - Separates headers from body in the response.
513pub fn curl_text_with_args_headers(url: &str, extra_args: &[&str]) -> Result<CurlResponse> {
514 let mut args = curl_args(url, extra_args);
515 // Include headers in output (-i flag)
516 args.push("-i".to_string());
517 // Append write-out format to get HTTP status code at the end
518 args.push("-w".to_string());
519 args.push("\n%{http_code}\n".to_string());
520 let curl_bin = get_curl_path();
521 let out = std::process::Command::new(curl_bin)
522 .args(&args)
523 .output()
524 .map_err(|e| {
525 format!("curl command failed to execute: {e} (is curl installed and in PATH?)")
526 })?;
527
528 let stdout = String::from_utf8(out.stdout)?;
529
530 // Parse status code from the end of output (last line should be the status code)
531 let status_code = stdout
532 .lines()
533 .last()
534 .and_then(|line| line.trim().parse::<u16>().ok());
535
536 // Find the boundary between headers and body (empty line)
537 let lines: Vec<&str> = stdout.lines().collect();
538 let mut header_end = 0;
539 let mut found_empty_line = false;
540 for (i, line) in lines.iter().enumerate() {
541 if line.trim().is_empty() && i > 0 {
542 // Found empty line separating headers from body
543 header_end = i;
544 found_empty_line = true;
545 break;
546 }
547 }
548
549 // Extract headers and body
550 let (headers_text, body_lines) = if found_empty_line {
551 let headers: Vec<&str> = lines[..header_end].to_vec();
552 // Skip the empty line and status code line at the end
553 let body_end = lines.len().saturating_sub(1); // Exclude status code line
554 let body: Vec<&str> = if header_end + 1 < body_end {
555 lines[header_end + 1..body_end].to_vec()
556 } else {
557 vec![]
558 };
559 (headers.join("\n"), body.join("\n"))
560 } else {
561 // No headers found, treat entire output as body (minus status code)
562 let body_end = lines.len().saturating_sub(1);
563 let body: Vec<&str> = if body_end > 0 {
564 lines[..body_end].to_vec()
565 } else {
566 vec![]
567 };
568 (String::new(), body.join("\n"))
569 };
570
571 // Parse headers
572 let retry_after_seconds = (!headers_text.is_empty())
573 .then(|| extract_retry_after(&headers_text))
574 .flatten();
575 let etag = (!headers_text.is_empty())
576 .then(|| extract_header_value(&headers_text, "ETag"))
577 .flatten();
578 let last_modified = (!headers_text.is_empty())
579 .then(|| extract_header_value(&headers_text, "Last-Modified"))
580 .flatten();
581
582 Ok(CurlResponse {
583 body: body_lines,
584 status_code,
585 retry_after_seconds,
586 etag,
587 last_modified,
588 })
589}
590
591/// What: Fetch plain text from a URL using curl with custom arguments.
592///
593/// Inputs:
594/// - `url`: URL to request
595/// - `extra_args`: Additional curl arguments (e.g., `["--max-time", "10"]`)
596///
597/// Output:
598/// - `Ok(String)` with response body; `Err` if curl or UTF-8 decoding fails
599///
600/// # Errors
601/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
602/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
603/// - Returns `Err` when response body cannot be decoded as UTF-8
604/// - Returns `Err` with message containing "429" when HTTP 429 (Too Many Requests) is received
605///
606/// Details:
607/// - Executes curl with appropriate flags plus extra arguments.
608/// - On Windows, uses `-k` flag to skip SSL certificate verification.
609/// - Uses `-i` flag to include headers for Retry-After parsing.
610/// - Uses `-w "\n%{http_code}\n"` to detect HTTP status codes, especially 429.
611/// - Provides user-friendly error messages for common curl failure cases.
612/// - HTTP 429/503 errors include Retry-After information when available.
613pub fn curl_text_with_args(url: &str, extra_args: &[&str]) -> Result<String> {
614 let mut args = curl_args(url, extra_args);
615 // Include headers in output (-i flag) for Retry-After parsing
616 args.push("-i".to_string());
617 // Append write-out format to get HTTP status code at the end
618 args.push("-w".to_string());
619 args.push("\n%{http_code}\n".to_string());
620 let curl_bin = get_curl_path();
621 let out = std::process::Command::new(curl_bin)
622 .args(&args)
623 .output()
624 .map_err(|e| {
625 format!("curl command failed to execute: {e} (is curl installed and in PATH?)")
626 })?;
627
628 let stdout = String::from_utf8(out.stdout)?;
629
630 // Parse status code from the end of output (last line should be the status code)
631 // Check if last line is a numeric status code (3 digits)
632 let lines: Vec<&str> = stdout.lines().collect();
633 let (status_code, body_end) = lines.last().map_or((None, lines.len()), |last_line| {
634 let trimmed = last_line.trim();
635 // Check if last line looks like an HTTP status code (3 digits)
636 if trimmed.len() == 3 && trimmed.chars().all(|c| c.is_ascii_digit()) {
637 (
638 trimmed.parse::<u16>().ok(),
639 lines.len().saturating_sub(1), // Exclude status code line
640 )
641 } else {
642 // Last line is not a status code, include it in body
643 (None, lines.len())
644 }
645 });
646
647 // Find the boundary between headers and body (empty line)
648 let mut header_end = 0;
649 let mut found_empty_line = false;
650 for (i, line) in lines.iter().enumerate() {
651 if line.trim().is_empty() && i > 0 {
652 // Found empty line separating headers from body
653 header_end = i;
654 found_empty_line = true;
655 break;
656 }
657 }
658
659 // Extract headers and body
660 let (headers_text, body_lines) = if found_empty_line {
661 let headers: Vec<&str> = lines[..header_end].to_vec();
662 // Check if headers section actually contains non-empty lines
663 // If not, treat as if there are no headers (empty line is just formatting)
664 let has_actual_headers = headers.iter().any(|h| !h.trim().is_empty());
665 if has_actual_headers {
666 // Skip the empty line and status code line at the end
667 let body: Vec<&str> = if header_end + 1 < body_end {
668 lines[header_end + 1..body_end].to_vec()
669 } else {
670 vec![]
671 };
672 (headers.join("\n"), body.join("\n"))
673 } else {
674 // No actual headers, treat entire output as body (up to body_end)
675 let body: Vec<&str> = if body_end > 0 {
676 // Include everything up to body_end, filtering out empty lines
677 lines[..body_end]
678 .iter()
679 .filter(|line| !line.trim().is_empty())
680 .copied()
681 .collect()
682 } else {
683 vec![]
684 };
685 (String::new(), body.join("\n"))
686 }
687 } else {
688 // No headers found, treat entire output as body (up to body_end)
689 let body: Vec<&str> = if body_end > 0 {
690 lines[..body_end].to_vec()
691 } else {
692 vec![]
693 };
694 (String::new(), body.join("\n"))
695 };
696
697 // Parse headers
698 let retry_after_seconds = if headers_text.is_empty() {
699 None
700 } else {
701 extract_retry_after(&headers_text)
702 };
703
704 // Check for HTTP errors
705 if let Some(code) = status_code
706 && code >= 400
707 {
708 // Check if we got HTTP 429 (Too Many Requests)
709 if code == 429 {
710 let mut error_msg = "HTTP 429 Too Many Requests - rate limited by server".to_string();
711 if let Some(retry_after) = retry_after_seconds {
712 error_msg.push_str(" (Retry-After: ");
713 error_msg.push_str(&retry_after.to_string());
714 error_msg.push_str("s)");
715 }
716 return Err(error_msg.into());
717 }
718 if code == 503 {
719 let mut error_msg = "HTTP 503 Service Unavailable".to_string();
720 if let Some(retry_after) = retry_after_seconds {
721 error_msg.push_str(" (Retry-After: ");
722 error_msg.push_str(&retry_after.to_string());
723 error_msg.push_str("s)");
724 }
725 return Err(error_msg.into());
726 }
727 }
728
729 // Check curl exit status for other errors
730 if !out.status.success() {
731 let error_msg = map_curl_error(out.status.code(), out.status);
732 return Err(error_msg.into());
733 }
734
735 Ok(body_lines)
736}
737
738#[cfg(test)]
739mod tests {
740 use super::*;
741
742 #[test]
743 fn test_get_curl_path_returns_valid_path() {
744 let path = get_curl_path();
745 // Should return either an absolute path or "curl"
746 assert!(
747 path == "curl"
748 || path.starts_with('/')
749 || path.starts_with("C:\\")
750 || path.starts_with(r"C:\"),
751 "Expected valid curl path, got: {path}"
752 );
753 }
754
755 #[test]
756 fn test_get_curl_path_is_cached() {
757 // Calling get_curl_path twice should return the same value
758 let path1 = get_curl_path();
759 let path2 = get_curl_path();
760 assert_eq!(path1, path2, "Curl path should be cached and consistent");
761 }
762
763 #[test]
764 #[cfg(unix)]
765 fn test_get_curl_path_prefers_absolute_on_unix() {
766 let path = get_curl_path();
767 // On Unix systems where curl is installed in standard locations,
768 // we should get an absolute path
769 if std::path::Path::new("/usr/bin/curl").exists()
770 || std::path::Path::new("/bin/curl").exists()
771 || std::path::Path::new("/usr/local/bin/curl").exists()
772 {
773 assert!(
774 path.starts_with('/'),
775 "Expected absolute path on Unix when curl is in standard location, got: {path}"
776 );
777 }
778 }
779
780 #[test]
781 fn test_redact_url_for_logging_with_query_params() {
782 // This test is only compiled on Windows, but we can still test the logic
783 fn redact_url(url: &str) -> String {
784 url.find('?').map_or_else(
785 || url.to_string(),
786 |query_start| format!("{}?[REDACTED]", &url[..query_start]),
787 )
788 }
789
790 // URL with query parameters should be redacted
791 let url_with_params = "https://api.example.com/search?apikey=secret123&query=test";
792 let redacted = redact_url(url_with_params);
793 assert_eq!(redacted, "https://api.example.com/search?[REDACTED]");
794 assert!(!redacted.contains("secret123"));
795 assert!(!redacted.contains("apikey"));
796 }
797
798 #[test]
799 fn test_redact_url_for_logging_without_query_params() {
800 fn redact_url(url: &str) -> String {
801 url.find('?').map_or_else(
802 || url.to_string(),
803 |query_start| format!("{}?[REDACTED]", &url[..query_start]),
804 )
805 }
806
807 // URL without query parameters should remain unchanged
808 let url_no_params = "https://archlinux.org/mirrors/status/json/";
809 let redacted = redact_url(url_no_params);
810 assert_eq!(redacted, url_no_params);
811 }
812
813 #[test]
814 fn test_redact_url_for_logging_empty_query() {
815 fn redact_url(url: &str) -> String {
816 url.find('?').map_or_else(
817 || url.to_string(),
818 |query_start| format!("{}?[REDACTED]", &url[..query_start]),
819 )
820 }
821
822 // URL with empty query string should still redact
823 let url_empty_query = "https://example.com/path?";
824 let redacted = redact_url(url_empty_query);
825 assert_eq!(redacted, "https://example.com/path?[REDACTED]");
826 }
827
828 #[test]
829 #[cfg(unix)]
830 fn test_map_curl_error_common_codes() {
831 use std::os::unix::process::ExitStatusExt;
832 use std::process::ExitStatus;
833
834 // Test exit code 22 (HTTP error)
835 let status = ExitStatus::from_raw(22 << 8);
836 let msg = map_curl_error(Some(22), status);
837 assert!(msg.contains("HTTP error"));
838
839 // Test exit code 6 (DNS error)
840 let status = ExitStatus::from_raw(6 << 8);
841 let msg = map_curl_error(Some(6), status);
842 assert!(msg.contains("resolve host"));
843
844 // Test exit code 7 (connection error)
845 let status = ExitStatus::from_raw(7 << 8);
846 let msg = map_curl_error(Some(7), status);
847 assert!(msg.contains("connect"));
848
849 // Test exit code 28 (timeout)
850 let status = ExitStatus::from_raw(28 << 8);
851 let msg = map_curl_error(Some(28), status);
852 assert!(msg.contains("timeout"));
853 }
854}