php - Even CURL function can't scrape some urls -
I am using curl to scrap the html of the url. It uses 80% URLs which I use. But some URLs do not seem to "scrape", for example, when I try to scrape, it does not work. The website is loading and in the end it does not return any results. The problem is that this is my code:
if ($ _ POST ['submit']) {function curl_exec_follow ($ ch , & Max $ max = null) {$ mr = $ Maxredirect === Tap? 5: difference ($ max); If (ini_get ( 'open_basedir') == '' & amp; amp; ini_get ( 'safe_mode' == 'off')) {curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, $ mr & gt; 0); Curl_setopt ($ CH, CURLOPT_MAXREDIRS, $ MR); Curl_setopt ($ CH, CURLOPT_RETURNTRANSFER, true); Curl_setopt ($ CH, CURLOPT_SSL_VERIFYPEER, incorrect); } And (curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, false); if ($ mr & gt; 0) {$ original_url = curl_getinfo ($ ch, CURLINFO_EFFECTIVE_URL); $ Newurl = $ original_url; $ Rch = curl_copy_handle ($ ch); Curl_setopt ($ rch, CURLOPT_HEADER, true); Curl_setopt ($ rch, CURLOPT_NOBODY, true); Curl_setopt ($ rch, CURLOPT_FORBID_REUSE, false); to {curl_setopt ($ rch, CURLOPT_URL, $ newurl); $ header = Krl_akssi ($ RC) ; if (curl_errno ($ rch)) {$ code = 0;} and {$ code = curl_getinfo ($ rch, CURLINFO_HTTP_CODE); if ($ code == 301 || $ code == 302) {preg_match ( '/ location : $. * $?); $ Newurl = trim (array_pop ($ matches)); // If no scheme exists then the new URL is a // relative path and thus in Additionally Need Ekbal if (! Preg_match ( "/ ^ https https: / i", $ newurl)) {$ newurl = $ original_url $ NEWURL;}} and {$ code = 0;}}} while ($ code & amp ; - $ mr); curl_close ($ RCH); if (! $ MR) {if ($ maxredirect === null) trigger_upper ('too much redirection.', E_USER_WARNING); Else $ maxredirect = 0; return false; } Curl_setopt ($ ch, CURLOPT_URL, $ newurl); }} Return curl_exec ($ ch); } $ Ch = curl_init ($ _ POST ['form_url']); Curl_setopt ($ CH, CURLOPT_RETURNTRANSFER, true); $ Data = curl_xax_fol ($ f); Curl_close ($ ch); $ Data echo;
try it .. hopefully it helps ...
Curl, CURLOPT_HTTPHEADER, $ header); Curl_setopt ($ this-> Curl, CURLOPT_COOKIEJAR, $ cookiejar); Curl_setopt ($ this-> Curl, CURLOPT_COOKIEFILE, $ cookiejar); Curl_setopt ($ this-> Curl, CURLOPT_AUTOREFERER, true); Curl_setopt ($ this-> Curl, CURLOPT_FOLLOWLOCATION, true); Curl_setopt ($ this-> Curl, CURLOPT_RETURNTRANSFER, is true); } Find function ($ url) {$ this- & gt; Curl = curl_init ($ url); $ This- & gt; Set-up (); Return $ $-> request (); } GetAll function ($ reg, $ str) {preg_match_all ($ reg, $ str, $ matches); Returns $ matches [1]; } Festival postForm ($ url, $ fields, $ referer = '') {$ this- & gt; Curl = curl_init ($ url); $ This- & gt; Set-up (); Curl_setopt ($ this-> Curl, CURLOPT_URL, $ url); Curl_setopt ($ this-> Curl, CURLOPT_POST, 1); Curl_setopt ($ this-> Curl, CURLOPT_REFERER, $ referer); Curl_setopt ($ this- & gt; curl, CURLOPT_POSTFIELDS, $ fields); Return $ $-> request (); } Function getInfo ($ info) {$ info = ($ info == 'last')? Curl_getinfo ($ this-> Curl, CURLINFO_EFFECTIVE_URL): curl_getinfo ($ this-> Curl, $ info); Return $ info; } Function request () {return curl_exec ($ this-> curl); }} {$ Curl = new curls (); $ Html = $ curl- & gt; Get ("http://www.thefancy.com"); Echo "$ html"; }? & Gt;
Comments
Post a Comment