Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 0 additions & 57 deletions src/scraperapi_mcp_server/scrape.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Annotated
from pydantic import BaseModel, Field, AnyUrl


class Scrape(BaseModel):
"""Parameters for scraping a URL."""

Expand All @@ -9,4 +10,4 @@ class Scrape(BaseModel):
country_code: Annotated[str, Field(default=None, description="Country code to scrape from")]
premium: Annotated[bool, Field(default=False, description="Whether to use premium scraping")]
ultra_premium: Annotated[bool, Field(default=False, description="Whether to use ultra premium scraping")]
device_type: Annotated[str, Field(default=None, description="Device type to scrape from. Set request to use `mobile` or `desktop` user agents")]
device_type: Annotated[str, Field(default=None, description="Device type to scrape from. Set request to use `mobile` or `desktop` user agents")]
34 changes: 34 additions & 0 deletions src/scraperapi_mcp_server/scraping/scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from scraperapi_mcp_server.config import settings
from scraperapi_mcp_server.utils.make_request import make_request

def basic_scrape(
url: str,
render: bool = None,
country_code: str = None,
premium: bool = None,
ultra_premium: bool = None,
device_type: str = None
) -> str:
payload = {
'api_key': settings.API_KEY,
'url': url,
'output_format': 'markdown'
}

optional_params = {
'render': (render, lambda v: str(v).lower()),
'country_code': (country_code, str),
'premium': (premium, lambda v: str(v).lower()),
'ultra_premium': (ultra_premium, lambda v: str(v).lower()),
'device_type': (device_type, str)
}

for key, (value, formatter) in optional_params.items():
if value is not None:
payload[key] = formatter(value)

return make_request(
url=settings.API_URL,
params=payload,
context=f"scraping '{url}'"
)
35 changes: 35 additions & 0 deletions src/scraperapi_mcp_server/sdes/amazon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from .base import ScraperEndpoint

amazon_product = ScraperEndpoint(
endpoint_path="/structured/amazon/product",
context_template="fetching Amazon product '{asin}'"
)

amazon_search = ScraperEndpoint(
endpoint_path="/structured/amazon/search",
context_template="fetching Amazon search results for '{query}'"
)

amazon_offers = ScraperEndpoint(
endpoint_path="/structured/amazon/offers",
context_template="fetching Amazon offers for '{asin}'"
)

def scrape_amazon_product(asin: str, tld: str, country: str, output_format: str) -> str:
return amazon_product.call(asin=asin, tld=tld, country=country, output_format=output_format)

def scrape_amazon_search(query: str, tld: str, country: str, output_format: str, page: int) -> str:
return amazon_search.call(query=query, tld=tld, country=country, output_format=output_format, page=page)

def scrape_amazon_offers(asin: str, tld: str, country: str, output_format: str, f_new: bool, f_used_good: bool, f_used_like_new: bool, f_used_very_good: bool, f_used_acceptable: bool) -> str:
return amazon_offers.call(
asin=asin,
tld=tld,
country=country,
output_format=output_format,
f_new=f_new,
f_used_good=f_used_good,
f_used_like_new=f_used_like_new,
f_used_very_good=f_used_very_good,
f_used_acceptable=f_used_acceptable
)
14 changes: 14 additions & 0 deletions src/scraperapi_mcp_server/sdes/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from scraperapi_mcp_server.config import settings
from scraperapi_mcp_server.utils.make_request import make_request

class ScraperEndpoint:
def __init__(self, endpoint_path, context_template):
self.endpoint_path = endpoint_path
self.context_template = context_template

def call(self, **params):
payload = {'api_key': settings.API_KEY}
payload.update(params)
url = f"{settings.API_URL}{self.endpoint_path}"
context = self.context_template.format(**params)
return make_request(url=url, params=payload, context=context)
34 changes: 34 additions & 0 deletions src/scraperapi_mcp_server/sdes/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Annotated
from pydantic import BaseModel, Field, AnyUrl


# Amazon
class ScrapeAmazonProductParams(BaseModel):
"""Parameters for scraping an Amazon product."""

asin: Annotated[str, Field(description="ASIN of the Amazon product page.")]
tld: Annotated[str, Field(description="Top-level domain to scrape.")]
country: Annotated[str, Field(description="Country to scrape from.")]
output_format: Annotated[str, Field(description="Output format to scrape from. We offer 'csv' and 'json' output. JSON is default if parameter is not added.")]


class ScrapeAmazonSearchParams(BaseModel):
"""Parameters for scraping an Amazon search."""

query: Annotated[str, Field(description="Query to scrape.")]
tld: Annotated[str, Field(description="Top-level domain to scrape.")]
country: Annotated[str, Field(description="Country to scrape from.")]
output_format: Annotated[str, Field(description="Output format to scrape from. We offer 'csv' and 'json' output. JSON is default if parameter is not added.")]


class ScrapeAmazonOffersParams(BaseModel):
"""Parameters for scraping an Amazon offers."""
asin: Annotated[str, Field(description="ASIN of the Amazon product page.")]
tld: Annotated[str, Field(description="Top-level domain to scrape.")]
country: Annotated[str, Field(description="Country to scrape from.")]
output_format: Annotated[str, Field(description="Output format to scrape from. We offer 'csv' and 'json' output. JSON is default if parameter is not added.")]
f_new: Annotated[bool, Field(description="Whether to scrape new offers.")]
f_used_good: Annotated[bool, Field(description="Whether to scrape used good offers.")]
f_used_like_new: Annotated[bool, Field(description="Whether to scrape used like new offers.")]
f_used_very_good: Annotated[bool, Field(description="Whether to scrape used very good offers.")]
f_used_acceptable: Annotated[bool, Field(description="Whether to scrape used acceptable offers.")]
87 changes: 81 additions & 6 deletions src/scraperapi_mcp_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
ErrorData,
INTERNAL_ERROR,
)
from scraperapi_mcp_server.model import Scrape
from scraperapi_mcp_server.country_codes import COUNTRY_CODES
from scraperapi_mcp_server.scrape import basic_scrape
from scraperapi_mcp_server.scraping.models import Scrape
from scraperapi_mcp_server.scraping.scrape import basic_scrape
from scraperapi_mcp_server.utils.country_codes import COUNTRY_CODES
from scraperapi_mcp_server.sdes.models import ScrapeAmazonProductParams, ScrapeAmazonSearchParams, ScrapeAmazonOffersParams
from scraperapi_mcp_server.sdes import amazon


mcp = FastMCP("mcp-scraperapi")
Expand All @@ -21,8 +23,7 @@ def scrape(params: Scrape) -> str:
Args:
params: A Scrape model instance containing all scraping parameters
- url: The URL to scrape (required)
- render: Set to True ONLY if the page requires JavaScript to load content.
Default is False, which is sufficient for most static websites.
- render: Set to True ONLY if the page requires JavaScript to load content. Default is False, which is sufficient for most static websites.
- country_code: Two-letter country code to scrape from (optional)
- premium: Whether to use premium proxies (optional)
- ultra_premium: Whether to use ultra premium proxies (optional)
Expand Down Expand Up @@ -95,4 +96,78 @@ def scrape_prompt(params: str) -> str:
scrape_params.country_code = code
break

return scrape(scrape_params)
return scrape(scrape_params)


# SDEs


# Amazon
@mcp.tool()
def scrape_amazon_product(params: ScrapeAmazonProductParams) -> str:
"""
Scrape a product from Amazon.

Args:
params:
- asin: The ASIN of the product to scrape
- tld: The top-level domain to scrape
- country: The country to scrape
- output_format: The output format to scrape, we offer 'csv' and 'json' output. JSON is default if parameter is not added
"""
return amazon.scrape_amazon_product(
asin=params.asin,
tld=params.tld,
country=params.country,
output_format=params.output_format,
)


@mcp.tool()
def scrape_amazon_search(params: ScrapeAmazonSearchParams) -> str:
"""
Scrape a search from Amazon.

Args:
params:
- query: The query to scrape
- tld: The top-level domain to scrape
- country: The country to scrape
- output_format: The output format to scrape, we offer 'csv' and 'json' output. JSON is default if parameter is not added
"""
return amazon.scrape_amazon_search(
query=params.query,
tld=params.tld,
country=params.country,
output_format=params.output_format
)


@mcp.tool()
def scrape_amazon_offers(params: ScrapeAmazonOffersParams) -> str:
"""
Scrape offers from Amazon.

Args:
params:
- asin: The ASIN of the product to scrape
- tld: The top-level domain to scrape
- country: The country to scrape
- output_format: The output format to scrape, we offer 'csv' and 'json' output. JSON is default if parameter is not added
- f_new: Whether to scrape new offers
- f_used_good: Whether to scrape used good offers
- f_used_like_new: Whether to scrape used like new offers
- f_used_very_good: Whether to scrape used very good offers
- f_used_acceptable: Whether to scrape used acceptable offers
"""
return amazon.scrape_amazon_offers(
asin=params.asin,
tld=params.tld,
country=params.country,
output_format=params.output_format,
f_new=params.f_new,
f_used_good=params.f_used_good,
f_used_like_new=params.f_used_like_new,
f_used_very_good=params.f_used_very_good,
f_used_acceptable=params.f_used_acceptable
)
1 change: 1 addition & 0 deletions src/scraperapi_mcp_server/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Package marker
40 changes: 40 additions & 0 deletions src/scraperapi_mcp_server/utils/make_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import requests
from scraperapi_mcp_server.config import settings
from requests.exceptions import RequestException, HTTPError as RequestsHTTPError
from mcp.shared.exceptions import McpError
from mcp.types import ErrorData, INTERNAL_ERROR


def make_request(url: str, params: dict, context: str = "request") -> str:
"""
Make an HTTP GET request with unified error handling.

Args:
url (str): The URL to request.
params (dict): Query parameters for the request.
context (str): Context string for error messages (e.g., 'scraping', 'fetching Ebay product').

Returns:
str: The response text.
"""
try:
response = requests.get(url, params=params, timeout=settings.API_TIMEOUT_SECONDS)
response.raise_for_status()
return response.text
except RequestsHTTPError as e:
status_code = e.response.status_code if hasattr(e, 'response') else 500
error_message = f"HTTP error {status_code} when {context}: {str(e)}"
raise McpError(ErrorData(
code=INTERNAL_ERROR,
message=error_message,
))
except RequestException as e:
raise McpError(ErrorData(
code=INTERNAL_ERROR,
message=f"Connection error when {context}: {str(e)}",
))
except Exception as e:
raise McpError(ErrorData(
code=INTERNAL_ERROR,
message=f"Unexpected error when {context}: {str(e)}",
))