NAV Navigation
shell python javascript ruby php java csharp go

Introduction

WebScrapingAPI.com is a simple and effective REST API interface for scraping web pages at scale.

The API automatically takes care of tasks that would to be dealt with programatically such as:

In this documentation you will find detailed usage guides and code examples in different programming languages that will help you get up and running in the shortest time possible. If the articles below leave any questions unanswered, please feel free to contact our technical support team.

Getting Started

API Access Key & Authentication

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

To authorize your API call, append the api_key parameter to the url:

https://api.webscrapingapi.com/v1?api_key=YOUR API KEY

After creating a WebScrapingAPI account, the account dashboard will reveal the unique API access key you can use to authenticate with the API. To do so, simply attach the api_key parameter to the API’s base URL and set it to your API access key.

Example:

https://api.webscrapingapi.com/v1?api_key=YOUR API KEY&url=https://httpbin.org/get

In the following code examples, we use a dummy API key of XXXXXX. Please replace it with your real API key.

API Error Codes

Example Error when submitting an incorrect API key

{
"success": false,
"error": {
"code": 403,
"error": "You have provided an invalid API key. Get a free API Key at https://app.webscrapingapi.com/sign-up or upgrade your plan"
}
}

If your request fails, the WebScrapingAPI will return an error in JSON format. Find to the right an example error that occurs when the API failed scraping the requested URL.

API Features

Basic Request

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://httpbin.org/get"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://httpbin.org/get"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/get'
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/get'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

To scrape a web page using the WebScrapingAPI, simply use the API’s base endpoint and append the URL you would like to scrape as well as your API access key as GET parameters. There is also a series of optional parameters you can choose from. In the right side box you will find an example request used to scrape the URL http://httpbin.org/get.

Request Parameters

Object Default Value Description
api_key - [Required] Specify your unique API access key to authenticate with the API. Your API access key can be found in your account dashboard.
url - [Required] Specify the URL of the web page you would like to scrape.
render_js 0 [optional] Set to 0 (off, default) or 1 (on) depending on whether or not to render JavaScript on the target web page. JavaScript rendering is done by using a browser. When render_js is enabled, we charge 5 API requests for a datacenter request and 25 API requests for a residential request.
proxy_type datacenter [optional] Set datacenter (default) or residential depending on whether proxy type you want to use for your scraping request. Please note that a single residential proxy API request is counted as 10 API requests when render_js is off and 25 API requests when the render_js is on.
country - [optional] Specify the 2-letter code of the country you would like to use as a proxy geolocation for your scraping API request. Supported countries differ by proxy type, please refer to the Proxy Locations section for details.
headers - [optional] Specify custom HTTP headers to be passed to the request. For example, in order to override the User-Agent header, use {"User-Agent": "Teapot"}
cookies - [optional] Specify custom cookies to be passed to the request.
session - [optional] Set depending on whether or not to use the same proxy address to your request.
timeout 10000 [optional] Specify the maximum timeout in milliseconds you would like to use for your scraping API request. In order to force a timeout, you can specify a number such as 1000. This will abort the request after 1000ms and return whatever HTML response was obtained until this point in time. The maximum value for this parameter is 14000.
device desktop [optional] Set desktop (default) or mobile or tablet, depending on whether the device type you want to your for your scraping request.
wait_until domcontentloaded [optional] {for advanced users} Specify the option you would like to us as conditional for your scraping API request. Can only be used when the parameter render_js=1 is activated.
wait_for 0 [optional] {for advanced users} Some websites may use javascript frameworks that may require a few extra seconds to load their content. This parameters specifies the time in miliseconds to wait for the website. Recommended values are in the interval 5000-10000.

Example API Request

The API request below is the simplest invocation that you can make by specifying the url only

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://bing.com"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://bing.com"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://bing.com'
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://bing.com'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fbing.com")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fbing.com");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fbing.com")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fbing.com"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

The above command returns raw HTML like this:

<!doctype html><html lang="de" dir="ltr"> ...

If your scraping request was successful, the API will respond with the raw HTML data of your target web page URL. If you have enabled HTTP headers, your API response will also contain the HTTP headers sent along with your original API request.

Rendering JavaScript

In order to allow rendering of JavaScript, pass render_js=1

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://bing.com",
"render_js":"1"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://httpbin.org/get&render_js=1"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/get',
render_js: '1'
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/get',
'render_js' => '1'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

Some web pages render essential page elements using JavaScript, which means that some content is not present (and therefore not scrapable) with the initial page load. With the render_js parameter enabled, the WebScrapingAPI is capable of accessing the target web using a headless browser and allows JavaScript page elements to render before delivering the final scraping result. To enable JavaScript rendering, simply append the render_js HTTP GET parameter to your API request URL and set it to 1. By default, this parameter is set to 0 (off).

Custom Headers

curl --request GET \
--url "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson" \
--header "My-header: test" \
--header "Accept: application/json" \
--header "User-Agent: potato" \
--header "Cookie: name1=value1; name2=value2"
import http.client

conn = http.client.HTTPSConnection("api.webscrapingapi.com")
api_key = "XXXXXX"
url = "http%3A%2F%2Fhttpbin.org%2Fheaders"
full_url = f"/v1?api_key={api_key}&url={url}"

my_headers = {
"My-header": "test",
"Accept": "application/json",
"User-Agent": "potato",
"Cookie": "name1=value1; name2=value2"
}

conn.request("GET", full_url, headers=my_headers)

res = conn.getresponse()
data = res.read()

print(data.decode("utf-8"))
const http = require("https");

const api_key = "XXXXXX";
const url = "http%3A%2F%2Fhttpbin.org%2Fheaders";

const myHeaders = {
"My-header": "test",
"Accept": "application/json",
"User-Agent": "potato",
"Cookie": "name1=value1; name2=value2"
};

const options = {
"method": "GET",
"hostname": "api.webscrapingapi.com",
"port": null,
"path": `/v1?api_key=${api_key}&url=${url}`,
"headers": myHeaders
};

const req = http.request(options, function (res) {
const chunks = [];

res.on("data", function (chunk) {
chunks.push(chunk);
});

res.on("end", function () {
const body = Buffer.concat(chunks);
console.log(body.toString());
});
});

req.end();
<?php

$curl = curl_init();

$hostname = "api.webscrapingapi.com";
$api_key = "XXXXXX";
$url = "http%3A%2F%2Fhttpbin.org%2Fheaders";

curl_setopt_array($curl, [
CURLOPT_URL => "https://{$hostname}/v1?api_key={$api_key}&url={$url}",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_HTTPHEADER => [
"Accept: application/json",
"My-header: test",
"User-Agent: potato",
"Cookie": "name1=value1; name2=value2"
],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
echo "cURL Error #:" . $err;
} else {
echo $response;
}
HttpResponse<String> response = Unirest.get("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fheaders")
.header("My-header", "test")
.header("Accept", "application/json")
.header("User-Agent", "potato")
.header("Cookie", "name1=value1; name2=value2")
.asString();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fheaders");
var request = new RestRequest(Method.GET);
request.AddHeader("My-header", "test");
request.AddHeader("Accept", "application/json");
request.AddHeader("User-Agent", "potato");
request.AddHeader("Cookie", "name1=value1; name2=value2");
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fheaders")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)
request["My-header"] = 'test'
request["Accept"] = 'application/json'
request["User-Agent"] = 'potato'
request["Cookie"] = "name1=value1; name2=value2"

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {

url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fheaders"

req, _ := http.NewRequest("GET", url, nil)

req.Header.Add("My-header", "test")
req.Header.Add("Accept", "application/json")
req.Header.Add("User-Agent", "potato")
req.Header.Add("Cookie", "name1=value1; name2=value2")

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))

}

If you would like to add custom/additional headers (user agents, cookies, etc.), simply add them as parameters to the request.

Only use this feature to get customized results, do not use this feature to avoid blocks. WebScrapingAPI has a powerful internal engine that takes care of everything for you.

In the right-side box, you will find an example request used to scrape the URL https://httpbin.org/headers, which will mirror the headers sent.

Custom Cookies

curl --request GET \
--url "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fcookies" \
--cookie "name1=value1; name2=value2"

curl --request GET \
--url "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fcookies" \
-b "name1=value1; name2=value2"
import http.client

conn = http.client.HTTPSConnection("api.webscrapingapi.com")
api_key = "XXXXXX"
url = "http%3A%2F%2Fhttpbin.org%2Fcookies"
full_url = f"/v1?api_key={api_key}&url={url}"

my_headers = {
"Accept": "application/json",
"Cookie": "name1=value1; name2=value2"
}

conn.request("GET", full_url, headers=my_headers)

res = conn.getresponse()
data = res.read()

print(data.decode("utf-8"))
const http = require("https");

const api_key = "XXXXXX";
const url = "http%3A%2F%2Fhttpbin.org%2Fcookies";

const myHeaders = {
"Accept": "application/json",
"Cookie": "name1=value1; name2=value2"
};

const options = {
"method": "GET",
"hostname": "api.webscrapingapi.com",
"path": `/v1?api_key=${api_key}&url=${url}`,
"headers": myHeaders
};

const req = http.request(options, function (res) {
const chunks = [];

res.on("data", function (chunk) {
chunks.push(chunk);
});

res.on("end", function () {
const body = Buffer.concat(chunks);
console.log(body.toString());
});
});

req.end();
<?php

$curl = curl_init();

$hostname = "api.webscrapingapi.com";
$api_key = "XXXXXX";
$url = "http%3A%2F%2Fhttpbin.org%2Fcookies";

curl_setopt_array($curl, [
CURLOPT_URL => "https://{$hostname}/v1?api_key={$api_key}&url={$url}",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_HTTPHEADER => [
"Accept: application/json",
"Cookie: name1=value1; name2=value2"
],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
echo "cURL Error #:" . $err;
} else {
echo $response;
}
HttpResponse<String> response = Unirest.get("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fcookies")
.header("Accept", "application/json")
.header("Cookie", "name1=value1; name2=value2")
.asString();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fcookies");
var request = new RestRequest(Method.GET);
request.AddHeader("Accept", "application/json");
request.AddHeader("Cookie", "name1=value1; name2=value2");
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fcookies")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)
request["Accept"] = 'application/json'
request["Cookie"] = "name1=value1; name2=value2"

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {

url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=http%3A%2F%2Fhttpbin.org%2Fcookies"

req, _ := http.NewRequest("GET", url, nil)

req.Header.Add("Accept", "application/json")
req.Header.Add("Cookie", "name1=value1; name2=value2")

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

If you would like to send custom cookies to the target website, simply add them as parameters to the request or as the “Cookie” header (see Custom-Headers section).

In the right-side box, you will find an example request used to scrape the URL https://httpbin.org/cookies, which will mirror the cookies sent.

Proxies

The WebScrapingAPI is making use of a pool of 100+ million IP addresses worldwide, making your requests impossible to unblock. We keep two separate pools made of private data center IPs and residential together with mobile IPs.

Across both data center and residential proxies, the WebScrapingAPI supports more than 195 global geolocations your scraping request can be sent from.

Specify the proxy type you want to use with your request using the proxy_type parameter, for data center proxies proxy_type=datacenter and for residential proxies proxy_type=residential.

What are Data Center proxies?

Datacenter proxies can be selected by passing the parameter proxy_type=datacenter

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://httpbin.org/get",
"proxy_type":"datacenter"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://httpbin.org/get&proxy_type=datacenter"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/get',
proxy_type: 'datacenter',
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/get',
'proxy_type' => 'datacenter'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fip")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

Datacenter proxies are proxies that are not affiliated with an Internet Service Provider (ISP). They come from a secondary corporation and provide you with completely private IP authentication and anonymity.

However, datacenter proxies usually come from cloud server providers and are used by many at the same time. Since they are not listed as ISP providers, these IPs can already be flagged by some targets and certain precautionary measures might be taken. But keep in mind that this is not the case with WebScrapingAPI datacenter proxies. All WebScrapingAPI datacenter proxies are private proxies, and ensure little to no IP blacklisting.

What are Residential Proxies?

Residential proxies can be selected by passing the parameter proxy_type=residential

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://httpbin.org/get",
"proxy_type":"residential"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://httpbin.org/get&proxy_type=residential"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/get',
proxy_type: 'residential'
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/get',
'proxy_type' => 'residential'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=residential")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=residential");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=residential")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=residential"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

A dedicated residential proxy is an IP address provided by an ISP (Internet Service Provider) to a homeowner. It is a real IP address attached to a physical location. So basically, whenever you move to a new location and set up your internet, your ISP will provide you with an IP address.

While these proxies are the most common proxies used on the internet, they are also much more likely to get blocked on an IP-basis when attempting to scrape data.

The main difference between datacenter and residential proxies lies in the source of the IP. All other differences that arise rely on the differences between household machines and servers.

Geolocation

Using the API’s country HTTP GET parameter you can choose a specific country by its 2-letter country code for your scraping request. Please find an example API request on the right side, which specifies US (for United States) as a proxy location using the default data center proxies.

Geolocation can be specified with the country parameter

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://httpbin.org/get",
"proxy_type":"datacenter",
"country":"us"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://httpbin.org/get&proxy_type=datacenter&country=us"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/get',
proxy_type: 'datacenter',
country: 'us'
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/get',
'proxy_type' => 'datacenter',
'country' => 'us'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

Free plan users don’t have access to geolocation targeting, a random country data center IP is assigned to the request.

Start plan users can only select US (United States) geolocation targeting, if no country is specified a random country data center IP is assigned to the request.

Grow, Business and Enterprise can access all geolocation targeting options.

Data Center Proxies Supported Countries

For datacenter proxies, the API currently supports a total of 7 global geolocations: United States (us), Canada (ca), United Kingdom (uk), Germany (de), France (fr), Spain (es), Brazil (br), Mexico (mx), India (in), Japan (jp), China (cn), and Australia (au).

Residential Proxies Supported Countries

For premium (residential) proxies, the API currently supports a total of 40 global geolocations. You can download a full list of supported countries and 2-letter country codes using the following link.

Access to 195 countries is available to Enterprise customers upon request.

POST Requests

curl -X "POST" "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fpost" \
-H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
--data-urlencode "foo=bar"
import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://httpbin.org/post"
}

headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8"
}

data = {
"foo": "bar"
}

response = requests.request("POST", url, params=params, headers=headers, data=data)

print(response.text)
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/post'
};

const headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8"
}

const data = {
"foo": "bar"
}

/**
* Use -form parameter for x-www-form-urlencoded body
* -body parameter for form-data body
* -json parameter for raw body
*/

const response = await got('https://api.webscrapingapi.com/v1', {
method: "POST",
searchParams: params,
headers: headers,
form: data
});
console.log(response.body);
})();
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fpost")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
data = {
"foo" => "bar"
}
body = URI.encode_www_form(data)

request = Net::HTTP::Post.new(url)
request.add_field "Content-Type", "application/x-www-form-urlencoded; charset=utf-8"
request.body = body

response = http.request(request)
puts response.read_body
<?php
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_POST);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/post'
]);

$request->setHeaders(
array('Content-Type' => 'application/x-www-form-urlencoded')
);

$request->setPostFields(
array('foo' => 'bar')
);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
?>
OkHttpClient client = new OkHttpClient();

RequestBody formBody = new FormBody.Builder()
.add("foo", "bar")
.build();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fpost")
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.post(formBody)
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fpost");

var request = new RestRequest(Method.POST);
request.AddHeader("Content-Type","application/x-www-form-urlencoded");
request.AddParameter("foo", "bar");

IRestResponse response = client.Execute(request);
package main

import (
"io/ioutil"
"log"
"net/http"
"net/url"
"strings"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fpost"
data := url.Values{}
data.Set("foo", "bar")

req, _ := http.NewRequest("POST", url, strings.NewReader(data.Encode()))
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

log.Println(res)
log.Println(string(body))
}

The result will be:

{
  "args": {},
  "data": "",
  "files": {},
  "form": {
    "foo": "bar"
  },
  "headers": {
    "date": "Sat, 20 Mar 2021 20:17:27 GMT",
    "content-type": "application/json",
    "content-length": "566",
    "connection": "close",
    "server": "gunicorn/19.9.0",
    "access-control-allow-origin": "*",
    "access-control-allow-credentials": "true"
  },
  "json": null,
  "origin": "23.92.126.215",
  "url": "https://httpbin.org/post"
}

(BETA) Now it is possible to scrape forms or API endpoints directly. You can do this by sending a POST request to WebScrapingAPI with your api_key and url parameter.

In the right-side box, you will find an example request used to scrape the URL https://httpbin.org/post.

Sessions

In order to reuse the same proxy (and thus IP address), you can specify a session by passing the session parameter

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://httpbin.org/get",
"proxy_type":"datacenter",
"country":"us",
"session":"100"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://httpbin.org/get&proxy_type=datacenter&country=us&session=100"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/get',
proxy_type: 'datacenter',
country: 'us',
session: '100'
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/get',
'proxy_type' => 'datacenter',
'country' => 'us',
'session' => '100'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us&session=100")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us&session=100");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us&session=100")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&proxy_type=datacenter&country=us&session=100"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

To reuse the same proxy for multiple requests, simply use the session parameter (e.g. session=123). The value of session can be any integer, simply send a new integer to create a new session (this will allow you to continue using the same proxy for each request with that session number). Sessions expire 15 minutes after the last usage.

Forcing Timeouts

You can specify the maximum allowed time that the engine is allowed to render, by passing the timeout parameter

import requests

url = "https://api.webscrapingapi.com/v1"

params = {
"api_key":"XXXXXX",
"url":"https://httpbin.org/get",
"render_js":"1",
"timeout":"200"
}

response = requests.request("GET", url, params=params)

print(response.text)
curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://httpbin.org/get&render_js=1&timeout=200"
const got = require('got');

(async () => {
const params = {
api_key: 'XXXXXX',
url: 'https://httpbin.org/get',
render_js: '1',
timeout: '200'
};

const response = await got('https://api.webscrapingapi.com/v1', {searchParams: params});
console.log(response.body);
})();
$request = new HttpRequest();
$request->setUrl('https://api.webscrapingapi.com/v1');
$request->setMethod(HTTP_METH_GET);

$request->setQueryData([
'api_key' => 'XXXXXX',
'url' => 'https://httpbin.org/get',
'render_js' => '1',
'timeout' => '200'
]);

try {
$response = $request->send();

echo $response->getBody();
} catch (HttpException $ex) {
echo $ex;
}
OkHttpClient client = new OkHttpClient();

Request request = new Request.Builder()
.url("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")
.get()
.build();

Response response = client.newCall(request).execute();
var client = new RestClient("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body
package main

import (
"fmt"
"net/http"
"io/ioutil"
)

func main() {
url := "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200"

req, _ := http.NewRequest("GET", url, nil)

res, _ := http.DefaultClient.Do(req)

defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)

fmt.Println(res)
fmt.Println(string(body))
}

When using a browser to make requests, some urls have the tendency to have long timeouts, because some esoteric part of the DOM is still loading. For that reason, WebScrapingAPI returns all the HTML that could be gathered until the timeout was triggered. The example to the right demonstrates how to force a timeout after timeout=3000 3 seconds. The maximum value that can be set for this parameter is 14000.

Binary Files

The following API request is the simplest form of invocation you can make by only specifying the url parameter.

curl "https://api.webscrapingapi.com/v1?api_key=XXXXXX&url=https://i.imgur.com/bHkmaqm.jpeg"

To scrape any type of file within the limit of 2MB, just set the url parameter to the url of that specific file. The response will include an object with the key base64_string and the file converted to a base64 string as the value.

The result will be:

{
    "base64_string": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD[...]oFcmgqvH0f+hxwQMj/9k="
}

Conclusion

WebScrapingAPI.com is a fundamental building block in your scraping pipeline. I provides the essentials and a highly scaleable service to developers that want to scrape en masse.

API Errors

The Webscrapingapi API uses the following HTTP error codes:

Error Code Meaning
400 Bad Request – Your request is invalid or has invalid parameters. This error code along the error message is also sent when the url could not be properly scraped.
401 Unauthorized – Your API key is wrong or invalid or your credit quota is exceeded.
403 Forbidden – You do not have sufficient rights to a resource such as JavaScript rendering or country specific geolocation. Please upgrade your subscription plan accordingly.
406 Not Acceptable – You requested a format that is not supported.
429 Too Many Requests – You’re making too many requests in short succession or too many parallel requests.
500 Internal Server Error – We had a problem with our server. Try again later.
503 Service Unavailable – We’re temporarily offline for maintenance. Please try again later.