Skip to main content
The proxy rotation module provides utilities for searching, validating, and managing proxy servers for web scraping operations.

Type Definitions

ProxyBrokerCriteria

class ProxyBrokerCriteria(TypedDict, total=False):
    anonymous: bool
    countryset: Set[str]
    secure: bool
    timeout: float
    search_outside_if_empty: bool
Criteria for searching proxy servers.

ProxySettings

class ProxySettings(TypedDict, total=False):
    server: str
    bypass: str
    username: str
    password: str
Proxy server configuration settings.

Proxy

class Proxy(ProxySettings):
    criteria: ProxyBrokerCriteria
Complete proxy configuration including server settings and search criteria.

Functions

search_proxy_servers

search_proxy_servers(
    anonymous: bool = True,
    countryset: Optional[Set[str]] = None,
    secure: bool = False,
    timeout: float = 5.0,
    max_shape: int = 5,
    search_outside_if_empty: bool = True,
) -> List[str]
Search for proxy servers that match the specified broker criteria.
anonymous
bool
default:"True"
Whether proxy servers should have minimum level-1 anonymity.
countryset
Optional[Set[str]]
default:"None"
Set of country codes for admissible proxy server locations (e.g., ).
secure
bool
default:"False"
Whether proxy servers should support HTTPS. Defaults to HTTP.
timeout
float
default:"5.0"
The maximum timeout for proxy responses in seconds.
max_shape
int
default:"5"
The maximum number of proxy servers to return.
search_outside_if_empty
bool
default:"True"
Whether to extend the countryset search if no proxies are found in specified countries.
proxies
List[str]
A list of proxy server URLs matching the criteria (e.g., [“http://103.10.63.135:8080”, “http://113.20.31.250:8080”]).

Example: Basic Usage

from scrapegraphai.utils import search_proxy_servers

# Search for 3 anonymous proxies
proxies = search_proxy_servers(
    anonymous=True,
    max_shape=3,
    timeout=5.0
)

print(f"Found {len(proxies)} proxies:")
for proxy in proxies:
    print(f"  - {proxy}")

Example: Country-Specific Proxies

from scrapegraphai.utils import search_proxy_servers

# Search for proxies in specific countries
proxies = search_proxy_servers(
    anonymous=True,
    countryset={"US", "GB", "CA"},
    secure=True,  # HTTPS proxies
    timeout=3.0,
    max_shape=5
)

for proxy in proxies:
    print(proxy)

Example: With ScrapeGraphAI

from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import search_proxy_servers

# Get a list of proxies
proxies = search_proxy_servers(
    anonymous=True,
    countryset={"US"},
    max_shape=1
)

# Use the first proxy in your scraper configuration
graph_config = {
    "llm": {"model": "openai/gpt-4o-mini"},
    "loader_kwargs": {
        "proxy": {
            "server": proxies[0],
        }
    }
}

smart_scraper = SmartScraperGraph(
    prompt="Extract product information",
    source="https://example.com",
    config=graph_config,
)

result = smart_scraper.run()

parse_or_search_proxy

parse_or_search_proxy(proxy: Proxy) -> ProxySettings
Parses a proxy configuration or searches for a matching one via broker. This function automatically determines whether to parse an existing proxy configuration or search for a new one.
proxy
Proxy
required
The proxy configuration to parse or search for. Must include a “server” field.
settings
ProxySettings
A Playwright-compliant proxy configuration ready to use.

Example: Parse Existing Proxy

from scrapegraphai.utils import parse_or_search_proxy

# Parse a known proxy server
proxy_config = {
    "server": "http://103.10.63.135:8080",
    "username": "myuser",
    "password": "mypass"
}

parsed_proxy = parse_or_search_proxy(proxy_config)
print(parsed_proxy)
# Output: {'server': 'http://103.10.63.135:8080', 'username': 'myuser', 'password': 'mypass'}

Example: Search for Proxy via Broker

from scrapegraphai.utils import parse_or_search_proxy

# Search for a proxy using broker criteria
proxy_config = {
    "server": "broker",
    "criteria": {
        "anonymous": True,
        "countryset": {"US", "GB"},
        "secure": True,
        "timeout": 5.0
    }
}

proxy_settings = parse_or_search_proxy(proxy_config)
print(proxy_settings)
# Output: {'server': 'http://123.45.67.89:8080'}

is_ipv4_address

is_ipv4_address(address: str) -> bool
Check if a given address conforms to a valid IPv4 address format.
address
str
required
The address string to validate.
is_valid
bool
True if the address is a valid IPv4 address, False otherwise.

Example

from scrapegraphai.utils import is_ipv4_address

print(is_ipv4_address("192.168.1.1"))        # True
print(is_ipv4_address("256.1.1.1"))          # False
print(is_ipv4_address("example.com"))        # False
print(is_ipv4_address("103.10.63.135"))      # True

Complete Example: Rotating Proxies

from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import search_proxy_servers
import random

# Get a pool of proxies
proxy_pool = search_proxy_servers(
    anonymous=True,
    countryset={"US", "GB", "CA"},
    secure=True,
    max_shape=10,
    timeout=5.0
)

print(f"Proxy pool contains {len(proxy_pool)} proxies")

# Function to scrape with random proxy
def scrape_with_rotation(url: str, prompt: str):
    # Select a random proxy from the pool
    proxy = random.choice(proxy_pool)
    
    graph_config = {
        "llm": {"model": "openai/gpt-4o-mini"},
        "loader_kwargs": {
            "proxy": {"server": proxy}
        }
    }
    
    smart_scraper = SmartScraperGraph(
        prompt=prompt,
        source=url,
        config=graph_config,
    )
    
    print(f"Using proxy: {proxy}")
    return smart_scraper.run()

# Scrape multiple pages with rotation
urls = [
    "https://example.com/page1",
    "https://example.com/page2",
    "https://example.com/page3",
]

results = []
for url in urls:
    result = scrape_with_rotation(url, "Extract main content")
    results.append(result)
    print(f"Scraped {url}")

print(f"Total results: {len(results)}")

Error Handling

from scrapegraphai.utils import search_proxy_servers
from fp.errors import FreeProxyException

try:
    proxies = search_proxy_servers(
        anonymous=True,
        countryset={"XX"},  # Invalid country code
        max_shape=5,
        search_outside_if_empty=False  # Don't search outside
    )
except FreeProxyException as e:
    print(f"Failed to find proxies: {e}")
    # Fallback: try without country restriction
    proxies = search_proxy_servers(
        anonymous=True,
        max_shape=5
    )

Best Practices

  1. Proxy Pool Management: Create a pool of proxies and rotate through them to avoid rate limiting.
  2. Timeout Configuration: Set appropriate timeouts based on your needs:
    # Fast proxies for quick operations
    proxies = search_proxy_servers(timeout=2.0)
    
    # More lenient for slower connections
    proxies = search_proxy_servers(timeout=10.0)
    
  3. Authentication: Always provide username and password together:
    proxy_config = {
        "server": "http://proxy.example.com:8080",
        "username": "user",
        "password": "pass"  # Both must be provided
    }
    
  4. Country Selection: Choose countries closer to your target website for better performance:
    # For US-based websites
    proxies = search_proxy_servers(countryset={"US", "CA"})
    
    # For EU-based websites
    proxies = search_proxy_servers(countryset={"GB", "DE", "FR"})
    
  5. Secure Proxies: Use HTTPS proxies for sensitive operations:
    proxies = search_proxy_servers(secure=True, anonymous=True)