pacsea/util/
curl.rs

1//! Curl-based HTTP utilities for fetching JSON and text content.
2//!
3//! This module provides functions for executing curl commands and handling
4//! common error cases with user-friendly error messages.
5//!
6//! # Security
7//! - Uses absolute paths for curl binary when available (defense-in-depth against PATH hijacking)
8//! - Redacts URL query parameters in debug logs to prevent potential secret leakage
9
10use super::curl_args;
11use chrono;
12use serde_json::Value;
13use std::sync::OnceLock;
14
15/// What: Result type alias for curl utility errors.
16///
17/// Inputs: None (type alias).
18///
19/// Output: Result type with boxed error trait object.
20///
21/// Details: Standard error type for curl operations.
22type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
23
24/// Cached curl binary path for performance (computed once at first use).
25static CURL_PATH: OnceLock<String> = OnceLock::new();
26
27/// What: Find the curl binary path, preferring absolute paths for security.
28///
29/// Inputs: None
30///
31/// Output:
32/// - Path to curl binary (absolute path if found, otherwise "curl" for PATH lookup)
33///
34/// Details:
35/// - If `PACSEA_CURL_PATH` env var is set, returns "curl" to use PATH lookup (for testing)
36/// - On Unix: Checks `/usr/bin/curl`, `/bin/curl`, `/usr/local/bin/curl`
37/// - On Windows: Checks system paths (System32, Git, MSYS2, Cygwin, Chocolatey)
38///   and user paths (Scoop, `WinGet`, local installs)
39/// - Falls back to PATH lookup if no absolute path is found
40/// - Result is cached for performance using `OnceLock` (except when env override is set)
41/// - Defense-in-depth measure against PATH hijacking attacks
42fn get_curl_path() -> &'static str {
43    // Check for test override BEFORE using cache - allows tests to inject fake curl
44    // This check is outside OnceLock so it's evaluated on every call
45    if std::env::var("PACSEA_CURL_PATH").is_ok() {
46        // Leak a static string for the "curl" fallback in test mode
47        // This is intentional: tests need a consistent &'static str return type
48        return Box::leak(Box::new("curl".to_string()));
49    }
50
51    CURL_PATH.get_or_init(|| {
52        // Check common absolute paths first (defense-in-depth against PATH hijacking)
53        #[cfg(unix)]
54        {
55            for path in ["/usr/bin/curl", "/bin/curl", "/usr/local/bin/curl"] {
56                if std::path::Path::new(path).exists() {
57                    tracing::trace!(curl_path = path, "Using absolute path for curl");
58                    return path.to_string();
59                }
60            }
61        }
62
63        #[cfg(target_os = "windows")]
64        {
65            // On Windows, check common system installation paths first
66            let system_paths = [
67                r"C:\Windows\System32\curl.exe",
68                r"C:\Program Files\Git\mingw64\bin\curl.exe",
69                r"C:\Program Files (x86)\Git\mingw64\bin\curl.exe",
70                r"C:\Program Files\curl\bin\curl.exe",
71                r"C:\curl\bin\curl.exe",
72                r"C:\ProgramData\chocolatey\bin\curl.exe",
73                r"C:\msys64\usr\bin\curl.exe",
74                r"C:\msys64\mingw64\bin\curl.exe",
75                r"C:\cygwin64\bin\curl.exe",
76                r"C:\cygwin\bin\curl.exe",
77            ];
78
79            for path in system_paths {
80                if std::path::Path::new(path).exists() {
81                    tracing::trace!(curl_path = path, "Using absolute path for curl on Windows");
82                    return path.to_string();
83                }
84            }
85
86            // Check user-specific paths (Scoop, MSYS2, local installs)
87            if let Ok(user_profile) = std::env::var("USERPROFILE") {
88                let user_paths = [
89                    // Scoop
90                    format!(r"{user_profile}\scoop\shims\curl.exe"),
91                    format!(r"{user_profile}\scoop\apps\curl\current\bin\curl.exe"),
92                    format!(r"{user_profile}\scoop\apps\msys2\current\usr\bin\curl.exe"),
93                    format!(r"{user_profile}\scoop\apps\msys2\current\mingw64\bin\curl.exe"),
94                    // MSYS2 user installs
95                    format!(r"{user_profile}\msys64\usr\bin\curl.exe"),
96                    format!(r"{user_profile}\msys64\mingw64\bin\curl.exe"),
97                    format!(r"{user_profile}\msys2\usr\bin\curl.exe"),
98                    format!(r"{user_profile}\msys2\mingw64\bin\curl.exe"),
99                    // Other user paths
100                    format!(r"{user_profile}\.local\bin\curl.exe"),
101                    format!(r"{user_profile}\AppData\Local\Microsoft\WinGet\Packages\curl.exe"),
102                ];
103
104                for path in user_paths {
105                    if std::path::Path::new(&path).exists() {
106                        tracing::trace!(
107                            curl_path = %path,
108                            "Using user-specific path for curl on Windows"
109                        );
110                        return path;
111                    }
112                }
113            }
114        }
115
116        // Fallback to PATH lookup
117        tracing::trace!("No absolute curl path found, falling back to PATH lookup");
118        "curl".to_string()
119    })
120}
121
122/// What: Resolved curl executable path for subprocess or shell wrappers (e.g. privileged fetch).
123///
124/// Inputs:
125/// - None.
126///
127/// Output:
128/// - Path string matching the same resolution used by curl helpers (absolute binary when found).
129///
130/// Details:
131/// - Prefer this over hard-coding `curl` so planners can probe `--version` and build argv-safe commands.
132#[must_use]
133pub fn curl_binary_path() -> &'static str {
134    get_curl_path()
135}
136
137/// What: Redact query parameters from a URL for safe logging.
138///
139/// Inputs:
140/// - `url`: The full URL that may contain query parameters
141///
142/// Output:
143/// - URL with query parameters replaced by `?[REDACTED]` if present
144///
145/// Details:
146/// - Prevents potential secret leakage in logs (API keys, tokens in query strings)
147/// - Returns original URL if no query parameters are present
148#[cfg(target_os = "windows")]
149fn redact_url_for_logging(url: &str) -> String {
150    url.find('?').map_or_else(
151        || url.to_string(),
152        |query_start| format!("{}?[REDACTED]", &url[..query_start]),
153    )
154}
155
156/// What: Extract HTTP code from curl's `-w` output format.
157///
158/// Inputs:
159/// - `output`: The stdout output from curl that may contain `__HTTP_CODE__:XXX`
160///
161/// Output:
162/// - Some(u16) if an HTTP code was found, None otherwise
163///
164/// Details:
165/// - Looks for the `__HTTP_CODE__:` marker we add via `-w` flag
166fn extract_http_code_from_output(output: &str) -> Option<u16> {
167    output
168        .lines()
169        .find(|line| line.starts_with("__HTTP_CODE__:"))
170        .and_then(|line| line.strip_prefix("__HTTP_CODE__:"))
171        .and_then(|code| code.trim().parse().ok())
172}
173
174/// What: Extract HTTP code from curl's stderr error message.
175///
176/// Inputs:
177/// - `stderr`: The stderr output from curl
178///
179/// Output:
180/// - Some(u16) if an HTTP code was found in the error message, None otherwise
181///
182/// Details:
183/// - Parses curl's error format: "The requested URL returned error: XXX"
184fn extract_http_code_from_stderr(stderr: &str) -> Option<u16> {
185    // curl stderr format: "curl: (22) The requested URL returned error: 404"
186    stderr
187        .find("returned error: ")
188        .map(|idx| &stderr[idx + "returned error: ".len()..])
189        .and_then(|s| {
190            // Extract just the numeric part
191            let code_str: String = s.chars().take_while(char::is_ascii_digit).collect();
192            code_str.parse().ok()
193        })
194}
195
196/// What: Maps curl exit code to a human-readable error message with HTTP code info.
197///
198/// Inputs:
199/// - `code`: Exit code from curl process.
200/// - `status`: The full process exit status for signal handling.
201/// - `http_code`: The actual HTTP status code from the server.
202///
203/// Output:
204/// - Human-readable error string describing the network issue with specific HTTP code.
205///
206/// Details:
207/// - Provides more specific error messages when HTTP code is known
208/// - 404 is "Resource not found", 429 is "Rate limited", etc.
209fn map_curl_error_with_http_code(
210    code: Option<i32>,
211    status: std::process::ExitStatus,
212    http_code: u16,
213) -> String {
214    // If we have the actual HTTP code, provide a more specific message
215    match http_code {
216        404 => "HTTP 404: Resource not found (package may not exist in repository)".to_string(),
217        429 => "HTTP 429: Rate limited by server".to_string(),
218        500 => "HTTP 500: Internal server error".to_string(),
219        502 => "HTTP 502: Bad gateway".to_string(),
220        503 => "HTTP 503: Service temporarily unavailable".to_string(),
221        504 => "HTTP 504: Gateway timeout".to_string(),
222        _ if (400..500).contains(&http_code) => {
223            format!("HTTP {http_code}: Client error")
224        }
225        _ if http_code >= 500 => {
226            format!("HTTP {http_code}: Server error (temporarily unavailable)")
227        }
228        _ => map_curl_error(code, status),
229    }
230}
231
232/// What: Map curl exit codes to user-friendly error messages.
233///
234/// Inputs:
235/// - `code`: Optional exit code from curl command
236/// - `status`: Exit status for fallback error message
237///
238/// Output:
239/// - User-friendly error message string
240///
241/// Details:
242/// - Maps common curl exit codes (22, 6, 7, 28) to descriptive messages
243/// - Falls back to generic error message if code is unknown
244fn map_curl_error(code: Option<i32>, status: std::process::ExitStatus) -> String {
245    code.map_or_else(
246        || {
247            // Process was terminated by a signal or other reason
248            #[cfg(unix)]
249            {
250                use std::os::unix::process::ExitStatusExt;
251                status.signal().map_or_else(
252                    || format!("curl process failed: {status:?}"),
253                    |signal| format!("curl process terminated by signal {signal}"),
254                )
255            }
256            #[cfg(not(unix))]
257            {
258                format!("curl process failed: {status:?}")
259            }
260        },
261        |code| match code {
262            22 => "HTTP error from server (code unknown)".to_string(),
263            6 => "Could not resolve host (DNS/network issue)".to_string(),
264            7 => "Failed to connect to host (network unreachable)".to_string(),
265            28 => "Operation timeout".to_string(),
266            _ => format!("curl failed with exit code {code}"),
267        },
268    )
269}
270
271/// What: Fetch JSON from a URL using curl and parse into `serde_json::Value`.
272///
273/// Inputs:
274/// - `url`: HTTP(S) URL to request
275///
276/// Output:
277/// - `Ok(Value)` on success; `Err` if curl fails or the response is not valid JSON
278///
279/// # Errors
280/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
281/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
282/// - Returns `Err` when response body cannot be decoded as UTF-8
283/// - Returns `Err` when response body cannot be parsed as JSON
284///
285/// Details:
286/// - Executes curl with appropriate flags and parses the UTF-8 body with `serde_json`.
287/// - On Windows, uses `-k` flag to skip SSL certificate verification.
288/// - Provides user-friendly error messages for common curl failure cases.
289/// - For HTTP errors, includes the actual status code in the error message when available.
290pub fn curl_json(url: &str) -> Result<Value> {
291    let mut args = curl_args(url, &[]);
292    // Add write-out format to capture HTTP status code on failure
293    // The %{http_code} is curl's write-out format, not a Rust format string
294    #[allow(clippy::literal_string_with_formatting_args)]
295    let write_out_format = "\n__HTTP_CODE__:%{http_code}".to_string();
296    args.push("-w".to_string());
297    args.push(write_out_format);
298    let curl_bin = get_curl_path();
299    #[cfg(target_os = "windows")]
300    {
301        // On Windows, log curl command for debugging (URL redacted for security)
302        let safe_url = redact_url_for_logging(url);
303        tracing::debug!(
304            curl_bin = %curl_bin,
305            url = %safe_url,
306            "Executing curl command on Windows"
307        );
308    }
309    let out = std::process::Command::new(curl_bin).args(&args).output()?;
310    if !out.status.success() {
311        // Try to extract HTTP status code from stderr or stdout
312        let stderr = String::from_utf8_lossy(&out.stderr);
313        let stdout = String::from_utf8_lossy(&out.stdout);
314
315        // Look for HTTP code in the output
316        let http_code = extract_http_code_from_output(&stdout)
317            .or_else(|| extract_http_code_from_stderr(&stderr));
318
319        let error_msg = if let Some(code) = http_code {
320            map_curl_error_with_http_code(out.status.code(), out.status, code)
321        } else {
322            map_curl_error(out.status.code(), out.status)
323        };
324
325        #[cfg(target_os = "windows")]
326        {
327            let safe_url = redact_url_for_logging(url);
328            // On Windows, also log stderr for debugging
329            if !stderr.is_empty() {
330                tracing::warn!(stderr = %stderr, url = %safe_url, "curl stderr output on Windows");
331            }
332            // Also log stdout in case there's useful info there
333            if !stdout.is_empty() {
334                tracing::debug!(stdout = %stdout, url = %safe_url, "curl stdout on Windows (non-success)");
335            }
336        }
337        return Err(error_msg.into());
338    }
339    let raw_body = String::from_utf8(out.stdout)?;
340    // Strip the __HTTP_CODE__:XXX suffix we added via -w flag
341    let body = raw_body
342        .lines()
343        .filter(|line| !line.starts_with("__HTTP_CODE__:"))
344        .collect::<Vec<_>>()
345        .join("\n");
346    #[cfg(target_os = "windows")]
347    {
348        // On Windows, log response details for debugging API issues (URL redacted)
349        let safe_url = redact_url_for_logging(url);
350        if body.len() < 500 {
351            tracing::debug!(
352                url = %safe_url,
353                response_length = body.len(),
354                "curl response received on Windows"
355            );
356        } else {
357            tracing::debug!(
358                url = %safe_url,
359                response_length = body.len(),
360                "curl response received on Windows (truncated)"
361            );
362        }
363    }
364    let v: Value = serde_json::from_str(&body)?;
365    Ok(v)
366}
367
368/// What: Fetch plain text from a URL using curl.
369///
370/// Inputs:
371/// - `url`: URL to request
372///
373/// Output:
374/// - `Ok(String)` with response body; `Err` if curl or UTF-8 decoding fails
375///
376/// # Errors
377/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
378/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
379/// - Returns `Err` when response body cannot be decoded as UTF-8
380///
381/// Details:
382/// - Executes curl with appropriate flags and returns the raw body as a `String`.
383/// - On Windows, uses `-k` flag to skip SSL certificate verification.
384/// - Provides user-friendly error messages for common curl failure cases.
385pub fn curl_text(url: &str) -> Result<String> {
386    curl_text_with_args(url, &[])
387}
388
389/// What: Parse Retry-After header value into seconds.
390///
391/// Inputs:
392/// - `retry_after`: Retry-After header value (can be seconds as number or HTTP-date)
393///
394/// Output:
395/// - `Some(seconds)` if parsing succeeds, `None` otherwise
396///
397/// Details:
398/// - Supports both numeric format (seconds) and HTTP-date format (RFC 7231).
399/// - For HTTP-date, calculates seconds until that date.
400fn parse_retry_after(retry_after: &str) -> Option<u64> {
401    let trimmed = retry_after.trim();
402    // Try parsing as number (seconds)
403    if let Ok(seconds) = trimmed.parse::<u64>() {
404        return Some(seconds);
405    }
406    // Try parsing as HTTP-date (RFC 7231)
407    // Common formats: "Wed, 21 Oct 2015 07:28:00 GMT", "Wed, 21 Oct 2015 07:28:00 +0000"
408    if let Ok(dt) = chrono::DateTime::parse_from_rfc2822(trimmed) {
409        let now = chrono::Utc::now();
410        let retry_time = dt.with_timezone(&chrono::Utc);
411        if retry_time > now {
412            let duration = retry_time - now;
413            let seconds = duration.num_seconds().max(0);
414            // Safe: seconds is non-negative, and u64::MAX is much larger than any reasonable retry time
415            #[allow(clippy::cast_sign_loss)]
416            return Some(seconds as u64);
417        }
418        return Some(0);
419    }
420    // Try RFC 3339 format
421    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(trimmed) {
422        let now = chrono::Utc::now();
423        let retry_time = dt.with_timezone(&chrono::Utc);
424        if retry_time > now {
425            let duration = retry_time - now;
426            let seconds = duration.num_seconds().max(0);
427            // Safe: seconds is non-negative, and u64::MAX is much larger than any reasonable retry time
428            #[allow(clippy::cast_sign_loss)]
429            return Some(seconds as u64);
430        }
431        return Some(0);
432    }
433    None
434}
435
436/// What: Extract header value from HTTP response headers (case-insensitive).
437///
438/// Inputs:
439/// - `headers_text`: Raw HTTP headers text (from curl -i output)
440/// - `header_name`: Name of the header to extract (case-insensitive)
441///
442/// Output:
443/// - `Some(value)` if header found, `None` otherwise
444///
445/// Details:
446/// - Searches for header name (case-insensitive).
447/// - Returns trimmed value after the colon.
448fn extract_header_value(headers_text: &str, header_name: &str) -> Option<String> {
449    let header_lower = header_name.to_lowercase();
450    for line in headers_text.lines() {
451        let line_lower = line.trim_start().to_lowercase();
452        if line_lower.starts_with(&format!("{header_lower}:"))
453            && let Some(colon_pos) = line.find(':')
454        {
455            let value = line[colon_pos + 1..].trim().to_string();
456            return Some(value);
457        }
458    }
459    None
460}
461
462/// What: Extract Retry-After header value from HTTP response headers.
463///
464/// Inputs:
465/// - `headers_text`: Raw HTTP headers text (from curl -i output)
466///
467/// Output:
468/// - `Some(seconds)` if Retry-After header found and parsed, `None` otherwise
469///
470/// Details:
471/// - Searches for "Retry-After:" header (case-insensitive).
472/// - Parses the value using `parse_retry_after()`.
473fn extract_retry_after(headers_text: &str) -> Option<u64> {
474    extract_header_value(headers_text, "Retry-After")
475        .as_deref()
476        .and_then(parse_retry_after)
477}
478
479/// Response metadata including headers for parsing `Retry-After`, `ETag`, and `Last-Modified`.
480#[derive(Debug, Clone)]
481pub struct CurlResponse {
482    /// Response body.
483    pub body: String,
484    /// HTTP status code.
485    pub status_code: Option<u16>,
486    /// Retry-After header value in seconds, if present.
487    pub retry_after_seconds: Option<u64>,
488    /// `ETag` header value, if present.
489    pub etag: Option<String>,
490    /// Last-Modified header value, if present.
491    pub last_modified: Option<String>,
492}
493
494/// What: Fetch plain text from a URL using curl with custom arguments, including headers.
495///
496/// Inputs:
497/// - `url`: URL to request
498/// - `extra_args`: Additional curl arguments (e.g., `["--max-time", "10"]`)
499///
500/// Output:
501/// - `Ok(CurlResponse)` with response body, status code, and parsed headers; `Err` if curl or UTF-8 decoding fails
502///
503/// # Errors
504/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
505/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
506/// - Returns `Err` when response body cannot be decoded as UTF-8
507///
508/// Details:
509/// - Executes curl with `-i` flag to include headers in output.
510/// - Uses `-w "\n%{http_code}\n"` to get HTTP status code at the end.
511/// - Parses Retry-After header from response headers.
512/// - Separates headers from body in the response.
513pub fn curl_text_with_args_headers(url: &str, extra_args: &[&str]) -> Result<CurlResponse> {
514    let mut args = curl_args(url, extra_args);
515    // Include headers in output (-i flag)
516    args.push("-i".to_string());
517    // Append write-out format to get HTTP status code at the end
518    args.push("-w".to_string());
519    args.push("\n%{http_code}\n".to_string());
520    let curl_bin = get_curl_path();
521    let out = std::process::Command::new(curl_bin)
522        .args(&args)
523        .output()
524        .map_err(|e| {
525            format!("curl command failed to execute: {e} (is curl installed and in PATH?)")
526        })?;
527
528    let stdout = String::from_utf8(out.stdout)?;
529
530    // Parse status code from the end of output (last line should be the status code)
531    let status_code = stdout
532        .lines()
533        .last()
534        .and_then(|line| line.trim().parse::<u16>().ok());
535
536    // Find the boundary between headers and body (empty line)
537    let lines: Vec<&str> = stdout.lines().collect();
538    let mut header_end = 0;
539    let mut found_empty_line = false;
540    for (i, line) in lines.iter().enumerate() {
541        if line.trim().is_empty() && i > 0 {
542            // Found empty line separating headers from body
543            header_end = i;
544            found_empty_line = true;
545            break;
546        }
547    }
548
549    // Extract headers and body
550    let (headers_text, body_lines) = if found_empty_line {
551        let headers: Vec<&str> = lines[..header_end].to_vec();
552        // Skip the empty line and status code line at the end
553        let body_end = lines.len().saturating_sub(1); // Exclude status code line
554        let body: Vec<&str> = if header_end + 1 < body_end {
555            lines[header_end + 1..body_end].to_vec()
556        } else {
557            vec![]
558        };
559        (headers.join("\n"), body.join("\n"))
560    } else {
561        // No headers found, treat entire output as body (minus status code)
562        let body_end = lines.len().saturating_sub(1);
563        let body: Vec<&str> = if body_end > 0 {
564            lines[..body_end].to_vec()
565        } else {
566            vec![]
567        };
568        (String::new(), body.join("\n"))
569    };
570
571    // Parse headers
572    let retry_after_seconds = (!headers_text.is_empty())
573        .then(|| extract_retry_after(&headers_text))
574        .flatten();
575    let etag = (!headers_text.is_empty())
576        .then(|| extract_header_value(&headers_text, "ETag"))
577        .flatten();
578    let last_modified = (!headers_text.is_empty())
579        .then(|| extract_header_value(&headers_text, "Last-Modified"))
580        .flatten();
581
582    Ok(CurlResponse {
583        body: body_lines,
584        status_code,
585        retry_after_seconds,
586        etag,
587        last_modified,
588    })
589}
590
591/// What: Fetch plain text from a URL using curl with custom arguments.
592///
593/// Inputs:
594/// - `url`: URL to request
595/// - `extra_args`: Additional curl arguments (e.g., `["--max-time", "10"]`)
596///
597/// Output:
598/// - `Ok(String)` with response body; `Err` if curl or UTF-8 decoding fails
599///
600/// # Errors
601/// - Returns `Err` when curl command execution fails (I/O error or curl not found)
602/// - Returns `Err` when curl exits with non-zero status (network errors, HTTP errors, timeouts)
603/// - Returns `Err` when response body cannot be decoded as UTF-8
604/// - Returns `Err` with message containing "429" when HTTP 429 (Too Many Requests) is received
605///
606/// Details:
607/// - Executes curl with appropriate flags plus extra arguments.
608/// - On Windows, uses `-k` flag to skip SSL certificate verification.
609/// - Uses `-i` flag to include headers for Retry-After parsing.
610/// - Uses `-w "\n%{http_code}\n"` to detect HTTP status codes, especially 429.
611/// - Provides user-friendly error messages for common curl failure cases.
612/// - HTTP 429/503 errors include Retry-After information when available.
613pub fn curl_text_with_args(url: &str, extra_args: &[&str]) -> Result<String> {
614    let mut args = curl_args(url, extra_args);
615    // Include headers in output (-i flag) for Retry-After parsing
616    args.push("-i".to_string());
617    // Append write-out format to get HTTP status code at the end
618    args.push("-w".to_string());
619    args.push("\n%{http_code}\n".to_string());
620    let curl_bin = get_curl_path();
621    let out = std::process::Command::new(curl_bin)
622        .args(&args)
623        .output()
624        .map_err(|e| {
625            format!("curl command failed to execute: {e} (is curl installed and in PATH?)")
626        })?;
627
628    let stdout = String::from_utf8(out.stdout)?;
629
630    // Parse status code from the end of output (last line should be the status code)
631    // Check if last line is a numeric status code (3 digits)
632    let lines: Vec<&str> = stdout.lines().collect();
633    let (status_code, body_end) = lines.last().map_or((None, lines.len()), |last_line| {
634        let trimmed = last_line.trim();
635        // Check if last line looks like an HTTP status code (3 digits)
636        if trimmed.len() == 3 && trimmed.chars().all(|c| c.is_ascii_digit()) {
637            (
638                trimmed.parse::<u16>().ok(),
639                lines.len().saturating_sub(1), // Exclude status code line
640            )
641        } else {
642            // Last line is not a status code, include it in body
643            (None, lines.len())
644        }
645    });
646
647    // Find the boundary between headers and body (empty line)
648    let mut header_end = 0;
649    let mut found_empty_line = false;
650    for (i, line) in lines.iter().enumerate() {
651        if line.trim().is_empty() && i > 0 {
652            // Found empty line separating headers from body
653            header_end = i;
654            found_empty_line = true;
655            break;
656        }
657    }
658
659    // Extract headers and body
660    let (headers_text, body_lines) = if found_empty_line {
661        let headers: Vec<&str> = lines[..header_end].to_vec();
662        // Check if headers section actually contains non-empty lines
663        // If not, treat as if there are no headers (empty line is just formatting)
664        let has_actual_headers = headers.iter().any(|h| !h.trim().is_empty());
665        if has_actual_headers {
666            // Skip the empty line and status code line at the end
667            let body: Vec<&str> = if header_end + 1 < body_end {
668                lines[header_end + 1..body_end].to_vec()
669            } else {
670                vec![]
671            };
672            (headers.join("\n"), body.join("\n"))
673        } else {
674            // No actual headers, treat entire output as body (up to body_end)
675            let body: Vec<&str> = if body_end > 0 {
676                // Include everything up to body_end, filtering out empty lines
677                lines[..body_end]
678                    .iter()
679                    .filter(|line| !line.trim().is_empty())
680                    .copied()
681                    .collect()
682            } else {
683                vec![]
684            };
685            (String::new(), body.join("\n"))
686        }
687    } else {
688        // No headers found, treat entire output as body (up to body_end)
689        let body: Vec<&str> = if body_end > 0 {
690            lines[..body_end].to_vec()
691        } else {
692            vec![]
693        };
694        (String::new(), body.join("\n"))
695    };
696
697    // Parse headers
698    let retry_after_seconds = if headers_text.is_empty() {
699        None
700    } else {
701        extract_retry_after(&headers_text)
702    };
703
704    // Check for HTTP errors
705    if let Some(code) = status_code
706        && code >= 400
707    {
708        // Check if we got HTTP 429 (Too Many Requests)
709        if code == 429 {
710            let mut error_msg = "HTTP 429 Too Many Requests - rate limited by server".to_string();
711            if let Some(retry_after) = retry_after_seconds {
712                error_msg.push_str(" (Retry-After: ");
713                error_msg.push_str(&retry_after.to_string());
714                error_msg.push_str("s)");
715            }
716            return Err(error_msg.into());
717        }
718        if code == 503 {
719            let mut error_msg = "HTTP 503 Service Unavailable".to_string();
720            if let Some(retry_after) = retry_after_seconds {
721                error_msg.push_str(" (Retry-After: ");
722                error_msg.push_str(&retry_after.to_string());
723                error_msg.push_str("s)");
724            }
725            return Err(error_msg.into());
726        }
727    }
728
729    // Check curl exit status for other errors
730    if !out.status.success() {
731        let error_msg = map_curl_error(out.status.code(), out.status);
732        return Err(error_msg.into());
733    }
734
735    Ok(body_lines)
736}
737
738#[cfg(test)]
739mod tests {
740    use super::*;
741
742    #[test]
743    fn test_get_curl_path_returns_valid_path() {
744        let path = get_curl_path();
745        // Should return either an absolute path or "curl"
746        assert!(
747            path == "curl"
748                || path.starts_with('/')
749                || path.starts_with("C:\\")
750                || path.starts_with(r"C:\"),
751            "Expected valid curl path, got: {path}"
752        );
753    }
754
755    #[test]
756    fn test_get_curl_path_is_cached() {
757        // Calling get_curl_path twice should return the same value
758        let path1 = get_curl_path();
759        let path2 = get_curl_path();
760        assert_eq!(path1, path2, "Curl path should be cached and consistent");
761    }
762
763    #[test]
764    #[cfg(unix)]
765    fn test_get_curl_path_prefers_absolute_on_unix() {
766        let path = get_curl_path();
767        // On Unix systems where curl is installed in standard locations,
768        // we should get an absolute path
769        if std::path::Path::new("/usr/bin/curl").exists()
770            || std::path::Path::new("/bin/curl").exists()
771            || std::path::Path::new("/usr/local/bin/curl").exists()
772        {
773            assert!(
774                path.starts_with('/'),
775                "Expected absolute path on Unix when curl is in standard location, got: {path}"
776            );
777        }
778    }
779
780    #[test]
781    fn test_redact_url_for_logging_with_query_params() {
782        // This test is only compiled on Windows, but we can still test the logic
783        fn redact_url(url: &str) -> String {
784            url.find('?').map_or_else(
785                || url.to_string(),
786                |query_start| format!("{}?[REDACTED]", &url[..query_start]),
787            )
788        }
789
790        // URL with query parameters should be redacted
791        let url_with_params = "https://api.example.com/search?apikey=secret123&query=test";
792        let redacted = redact_url(url_with_params);
793        assert_eq!(redacted, "https://api.example.com/search?[REDACTED]");
794        assert!(!redacted.contains("secret123"));
795        assert!(!redacted.contains("apikey"));
796    }
797
798    #[test]
799    fn test_redact_url_for_logging_without_query_params() {
800        fn redact_url(url: &str) -> String {
801            url.find('?').map_or_else(
802                || url.to_string(),
803                |query_start| format!("{}?[REDACTED]", &url[..query_start]),
804            )
805        }
806
807        // URL without query parameters should remain unchanged
808        let url_no_params = "https://archlinux.org/mirrors/status/json/";
809        let redacted = redact_url(url_no_params);
810        assert_eq!(redacted, url_no_params);
811    }
812
813    #[test]
814    fn test_redact_url_for_logging_empty_query() {
815        fn redact_url(url: &str) -> String {
816            url.find('?').map_or_else(
817                || url.to_string(),
818                |query_start| format!("{}?[REDACTED]", &url[..query_start]),
819            )
820        }
821
822        // URL with empty query string should still redact
823        let url_empty_query = "https://example.com/path?";
824        let redacted = redact_url(url_empty_query);
825        assert_eq!(redacted, "https://example.com/path?[REDACTED]");
826    }
827
828    #[test]
829    #[cfg(unix)]
830    fn test_map_curl_error_common_codes() {
831        use std::os::unix::process::ExitStatusExt;
832        use std::process::ExitStatus;
833
834        // Test exit code 22 (HTTP error)
835        let status = ExitStatus::from_raw(22 << 8);
836        let msg = map_curl_error(Some(22), status);
837        assert!(msg.contains("HTTP error"));
838
839        // Test exit code 6 (DNS error)
840        let status = ExitStatus::from_raw(6 << 8);
841        let msg = map_curl_error(Some(6), status);
842        assert!(msg.contains("resolve host"));
843
844        // Test exit code 7 (connection error)
845        let status = ExitStatus::from_raw(7 << 8);
846        let msg = map_curl_error(Some(7), status);
847        assert!(msg.contains("connect"));
848
849        // Test exit code 28 (timeout)
850        let status = ExitStatus::from_raw(28 << 8);
851        let msg = map_curl_error(Some(28), status);
852        assert!(msg.contains("timeout"));
853    }
854}
pacsea/util/curl.rs

pacsea/util/
curl.rs