Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions CIDR_EXAMPLES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# CIDR Range Scanning Examples

This file demonstrates how to use CIDR range scanning in proxy-spider.

## Basic CIDR Examples

# Single IP (equivalent to 192.168.1.100:8080)

192.168.1.100/32:8080

# Small subnet (4 IPs: .0, .1, .2, .3)

192.168.1.0/30:3128

# Medium subnet (8 IPs: .0 through .7)

10.0.0.0/29:1080

# Larger subnet (16 IPs: .240 through .255)

172.16.1.240/28:8888

# Class C subnet (256 IPs: .0 through .255)

203.0.113.0/24:9090

## Mixed with Regular Entries

# You can mix CIDR ranges with regular IP:port entries:

192.168.1.0/30:8080
127.0.0.1:8888
10.0.0.0/31:3128
8.8.8.8:53

## Comments and Invalid Lines

# Lines starting with # are treated as comments and ignored

# Invalid CIDR ranges are preserved as-is for the regular parser

invalid-cidr-range:1234
not-an-ip:port

# Different protocols can use the same format

# Just put them in the appropriate protocol section in your config
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ futures = { version = "=0.3.31", optional = true }
hickory-resolver = "=0.25.2"
http = "=1.3.1"
httpdate = "=1.0.3"
ipnetwork = "=0.21.1"
itertools = "=0.14"
log = { version = "=0.4.28", features = [
"max_level_debug",
Expand Down
20 changes: 18 additions & 2 deletions config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ connect_timeout = 5.0
proxy = ""

# User-Agent header for scraping requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"


[checking]
Expand All @@ -42,7 +42,7 @@ timeout = 60.0
connect_timeout = 5.0

# User-Agent header for proxy check requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"


[output]
Expand Down Expand Up @@ -74,6 +74,16 @@ include_geolocation = true
# Proxy sources configuration
# Add local files: ["./my_proxies.txt"] or URLs
# Sources are fetched in parallel for speed
#
# CIDR Range Support:
# You can now use CIDR notation to scan entire network ranges!
# Format: IP/prefix:port (e.g., "192.168.1.0/24:8080")
# This expands to all IPs in the range with the specified port.
# Examples:
# 192.168.1.0/30:8080 → 4 IPs (.0, .1, .2, .3)
# 10.0.0.0/24:3128 → 256 IPs (.0 through .255)
# 203.0.113.0/29:1080 → 8 IPs (.0 through .7)
# Mix CIDR ranges with individual IP:port entries in the same file.

[scraping.http]
enabled = true
Expand All @@ -84,6 +94,12 @@ urls = [
# "C:/Users/user/Desktop/my_http_proxies.txt",
# "file:///home/user/my_http_proxies.txt",

# CIDR range file example:
# File containing: 192.168.1.0/24:8080
# 10.0.0.0/29:3128
# 203.0.113.1:1080
# "./my_cidr_ranges.txt",

# Advanced URL configuration examples (with basic auth or custom headers):
# HTTP Basic Auth example:
# { url = "https://some.api/endpoint", basic_auth = { username = "user", password = "password123" } },
Expand Down
1 change: 0 additions & 1 deletion out/.gitkeep
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@

197 changes: 197 additions & 0 deletions src/parsers.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::sync::LazyLock;

use ipnetwork::IpNetwork;

pub static PROXY_REGEX: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
let pattern = r"(?:^|[^0-9A-Za-z])(?:(?P<protocol>https?|socks[45]):\/\/)?(?:(?P<username>[0-9A-Za-z]{1,64}):(?P<password>[0-9A-Za-z]{1,64})@)?(?P<host>[A-Za-z][\-\.A-Za-z]{0,251}[A-Za-z]|[A-Za-z]|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3}):(?P<port>[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])(?=[^0-9A-Za-z]|$)";
fancy_regex::RegexBuilder::new(pattern)
Expand All @@ -13,10 +15,205 @@
fancy_regex::Regex::new(pattern).unwrap()
});

static CIDR_REGEX: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
let pattern = r"(?:^|[^0-9A-Za-z])(?P<network>(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3})/(?P<prefix>[0-9]|[12][0-9]|3[0-2]):(?P<port>[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])(?=[^0-9A-Za-z]|$)";
fancy_regex::Regex::new(pattern).unwrap()
});

pub fn parse_ipv4(s: &str) -> Option<String> {
if let Ok(Some(captures)) = IPV4_REGEX.captures(s) {
captures.name("host").map(|capture| capture.as_str().to_owned())
} else {
None
}
}

Check warning on line 29 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-spider/proxy-spider/src/parsers.rs

/// Expands CIDR ranges in text into individual IP:port entries
/// Supports format like "192.168.1.0/24:8080" which expands to all IPs in the range
/// Handles various separators (spaces, commas, newlines, etc.) between entries
pub fn expand_cidr_ranges(text: &str) -> String {
let mut result = text.to_string();

Check failure on line 35 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

`to_string()` called on a `&str`

Check failure on line 35 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

`to_string()` called on a `&str`
let mut offset: i32 = 0;

// Find all CIDR matches and expand them
let captures: Vec<_> =
CIDR_REGEX.captures_iter(text).filter_map(|m| m.ok()).collect();

Check failure on line 40 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

redundant closure

Check failure on line 40 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

redundant closure

for capture in captures {
if let (Some(network), Some(prefix), Some(port)) = (
capture.name("network"),
capture.name("prefix"),
capture.name("port"),
) {
let cidr_str = format!("{}/{}", network.as_str(), prefix.as_str());

match cidr_str.parse::<IpNetwork>() {
Ok(network) => {
// Generate expanded IPs
let expanded_ips: Vec<String> = network
.iter()
.filter(|ip| ip.is_ipv4())

Check failure on line 55 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

redundant closure

Check failure on line 55 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

redundant closure
.map(|ip| format!("{}:{}", ip, port.as_str()))
.collect();

Check warning on line 57 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-spider/proxy-spider/src/parsers.rs

if !expanded_ips.is_empty() {
// Get the full match including any leading non-alphanumeric character
let full_match = capture.get(0).unwrap();
let match_start =
(full_match.start() as i32 + offset) as usize;

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

casting `usize` to `i32` may wrap around the value on targets with 32-bit wide pointers

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

casting `usize` to `i32` may truncate the value on targets with 64-bit wide pointers

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

arithmetic operation that can potentially result in unexpected side-effects

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

casting `i32` to `usize` may lose the sign of the value

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

casting `usize` to `i32` may wrap around the value on targets with 32-bit wide pointers

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

casting `usize` to `i32` may truncate the value on targets with 64-bit wide pointers

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

arithmetic operation that can potentially result in unexpected side-effects

Check failure on line 63 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

casting `i32` to `usize` may lose the sign of the value
let match_end =

Check warning on line 64 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-spider/proxy-spider/src/parsers.rs
(full_match.end() as i32 + offset) as usize;

Check failure on line 65 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

arithmetic operation that can potentially result in unexpected side-effects

Check failure on line 65 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

casting `i32` to `usize` may lose the sign of the value

Check failure on line 65 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

arithmetic operation that can potentially result in unexpected side-effects

Check failure on line 65 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

casting `i32` to `usize` may lose the sign of the value

// Determine what separator to use by checking what follows
let separator = if match_end < result.len() {
let next_char = result.chars().nth(match_end);
match next_char {
Some('\n') => "\n",
Some('\t') => "\t",
Some(',') => ",",
_ => " ",
}
} else {
"\n"
};

// Join expanded IPs with the detected separator

Check warning on line 80 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-spider/proxy-spider/src/parsers.rs
let replacement = expanded_ips.join(separator);

// Handle case where match starts with a delimiter character
let (_actual_start, prefix_char) = if match_start > 0 {
let prev_char = result.chars().nth(match_start);
if prev_char
.map_or(false, |c| !c.is_ascii_alphanumeric())
{
(
match_start + 1,
result
.chars()
.nth(match_start)
.unwrap()
.to_string(),
)
} else {
(match_start, String::new())
}
} else {
(match_start, String::new())
};

let final_replacement =
format!("{}{}", prefix_char, replacement);

// Replace the CIDR pattern with expanded IPs
result.replace_range(
match_start..match_end,
&final_replacement,
);

// Update offset for subsequent replacements
let len_diff = final_replacement.len() as i32
- (match_end - match_start) as i32;
offset += len_diff;
}
}
Err(_) => {
// If parsing fails, leave the original text unchanged
continue;

Check failure on line 121 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (aarch64-unknown-linux-gnu,x86_64-unknown-linux-gnu,armv7-unknown-linux-gnueabihf,i686-unk...

this `continue` expression is redundant

Check failure on line 121 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / clippy (--features tui, aarch64-apple-darwin,aarch64-pc-windows-msvc,aarch64-unknown-linux-gnu,i6...

this `continue` expression is redundant
}
}
}
}

result
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_cidr_expansion() {
// Test basic CIDR expansion
let input = "192.168.1.0/30:8080";
let result = expand_cidr_ranges(input);
let lines: Vec<&str> = result.trim().split('\n').collect();

assert_eq!(lines.len(), 4);
assert!(lines.contains(&"192.168.1.0:8080"));
assert!(lines.contains(&"192.168.1.1:8080"));
assert!(lines.contains(&"192.168.1.2:8080"));
assert!(lines.contains(&"192.168.1.3:8080"));
}

#[test]
fn test_mixed_input() {
let input = "192.168.1.0/31:8080\n127.0.0.1:9090\ninvalid-line";
let result = expand_cidr_ranges(input);
let lines: Vec<&str> = result.trim().split('\n').collect();

// Should have 2 CIDR-expanded IPs + 1 regular IP + 1 invalid line
assert_eq!(lines.len(), 4);
assert!(lines.contains(&"192.168.1.0:8080"));
assert!(lines.contains(&"192.168.1.1:8080"));
assert!(lines.contains(&"127.0.0.1:9090"));
assert!(lines.contains(&"invalid-line"));
}

#[test]
fn test_single_ip_cidr() {
let input = "10.0.0.1/32:3128";
let result = expand_cidr_ranges(input);
assert_eq!(result.trim(), "10.0.0.1:3128");
}

#[test]
fn test_non_newline_separated_behavior() {
// Test space-separated entries with CIDR expansion
let input = "192.168.1.0/31:8080 127.0.0.1:9090";
let result = expand_cidr_ranges(input);

// Should expand the CIDR range and preserve the regular proxy
assert!(result.contains("192.168.1.0:8080"));
assert!(result.contains("192.168.1.1:8080"));
assert!(result.contains("127.0.0.1:9090"));
}

#[test]
fn test_multiple_cidr_same_line_behavior() {
// Test multiple CIDR ranges on same line
let input = "192.168.1.0/31:8080 10.0.0.0/31:3128";
let result = expand_cidr_ranges(input);

// Should expand both CIDR ranges
assert!(result.contains("192.168.1.0:8080"));
assert!(result.contains("192.168.1.1:8080"));
assert!(result.contains("10.0.0.0:3128"));
assert!(result.contains("10.0.0.1:3128"));
}

#[test]
fn test_comma_separated_cidr() {
let input = "192.168.1.0/31:8080,10.0.0.0/31:3128";
let result = expand_cidr_ranges(input);

// Should expand both CIDR ranges and preserve comma separation
assert!(result.contains("192.168.1.0:8080"));
assert!(result.contains("192.168.1.1:8080"));
assert!(result.contains("10.0.0.0:3128"));
assert!(result.contains("10.0.0.1:3128"));
}

Check warning on line 205 in src/parsers.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-spider/proxy-spider/src/parsers.rs
#[test]
fn test_mixed_separators() {
let input = "192.168.1.0/31:8080\t10.0.0.1:3128,203.0.113.0/31:1080 127.0.0.1:9090";
let result = expand_cidr_ranges(input);

// Should expand CIDR ranges and preserve non-CIDR entries
assert!(result.contains("192.168.1.0:8080"));
assert!(result.contains("192.168.1.1:8080"));
assert!(result.contains("10.0.0.1:3128"));
assert!(result.contains("203.0.113.0:1080"));
assert!(result.contains("203.0.113.1:1080"));
assert!(result.contains("127.0.0.1:9090"));
}
}
11 changes: 8 additions & 3 deletions src/scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::event::{AppEvent, Event};
use crate::{
HashSet,
config::{Config, Source},
parsers::PROXY_REGEX,
parsers::{PROXY_REGEX, expand_cidr_ranges},
proxy::{Proxy, ProxyType},
utils::pretty_error,
};
Expand Down Expand Up @@ -68,12 +68,17 @@ async fn scrape_one(
}
};

// Expand CIDR ranges to individual IP:port entries
let expanded_text = expand_cidr_ranges(&text);

#[cfg(feature = "tui")]
let mut seen_protocols = HashSet::new();

let mut new_proxies = HashSet::new();

for maybe_capture in PROXY_REGEX.captures_iter(&text) {
for (_, maybe_capture) in
PROXY_REGEX.captures_iter(&expanded_text).enumerate()
{
if config.scraping.max_proxies_per_source != 0
&& new_proxies.len() >= config.scraping.max_proxies_per_source
{
Expand Down Expand Up @@ -121,7 +126,7 @@ async fn scrape_one(
}

drop(config);
drop(text);
drop(expanded_text);

if new_proxies.is_empty() {
tracing::warn!("{}: no proxies found", source.url);
Expand Down
Loading
Loading