Hi all,
I've been looking at Apifier and was wondering if there was a way to send a Post response in a web script. This would allow mp3tag to outsource the crawling and data extraction to a online service that works with https and services where you have to login.
heres an example using cUrl.
curl --include \
--request PUT \
--header "Content-Type: application/json" \
--data-binary "{
\"customId\": \"My_crawler\",
\"comments\": \"My testing crawler\",
\"startUrls\": [
{
\"key\": \"START\",
\"value\": \"http://example.com\"
}
],
\"crawlPurls\": [
{
\"key\": \"PAGE\",
\"value\": \"http://example.com/test-2/[.*]\"
}
],
\"pageFunction\": \"function(context) { /* ... */ }\",
\"clickableElementsSelector\": \"#article a\",
\"interceptRequest\": \"function interceptRequest(context, newRequest) { return newRequest; }\",
\"considerUrlFragment\": true,
\"loadImages\": true,
\"loadCss\": true,
\"injectJQuery\": true,
\"injectUnderscoreJs\": true,
\"ignoreRobotsTxt\": false,
\"skipLoadingFrames\": true,
\"verboseLog\": true,
\"disableWebSecurity\": true,
\"maxCrawledPages\": 60,
\"maxOutputPages\": 60,
\"maxCrawlDepth\": 10,
\"timeout\": 300,
\"resourceTimeout\": 3000,
\"pageLoadTimeout\": 3000,
\"pageFunctionTimeout\": 3000,
\"maxInfiniteScrollHeight\": 600,
\"randomWaitBetweenRequests\": 1000,
\"maxCrawledPagesPerSlave\": 20,
\"maxParallelRequests\": 10,
\"customHttpHeaders\": [
{
\"key\": \"X-My-Header\",
\"value\": \"my value\"
}
],
\"customProxies\": \"http://username:password@myproxy.com:8080\",
\"cookies\": [
{
\"domain\": \".example.com\",
\"expires\": \"Thu, 01 Jun 2017 16:14:38 GMT\",
\"expiry\": \"1496333678\",
\"httponly\": \"true\",
\"name\": \"NAME\",
\"path\": \"/\",
\"secure\\\\"\": \"false\",
\"value\\\\"\": \"Some value\"
}
],
\"cookiesPersistence\": \"PER_PROCESS\",
\"customData\": \"some custom content\",
\"finishWebhookUrl\": \"http://example.com/some/path\"
}" \
'https://api.apifier.com/v1/hNNQbYhnwafECWc8f/crawlers/CwNxxSNdBYw7NWLjb?token=[token]'