Я отправляю запросы Curl на кучу из 50 сайтов, чтобы проверить их статус выполнения с помощью задания Crons. Однако я каждый раз получаю ответ от первых 10-15 сайтов. Я думаю, что после этого сервер может перегрузиться и перестанет выполняться.
Пример кода:
foreach ($sites as $site) { if(Visit("http://www.domain.com")) { //saving in database } } function Visit($url) { $agent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";$ch=curl_init(); curl_setopt ($ch, CURLOPT_URL,$url ); curl_setopt($ch, CURLOPT_USERAGENT, $agent); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch,CURLOPT_VERBOSE,false); curl_setopt($ch, CURLOPT_TIMEOUT, 5); $page=curl_exec($ch); $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if($httpcode>=200 && $httpcode<300) return true; else return false; }
Пожалуйста, предложите мне некоторые методы предотвращения перегрузки сервера и получения ответа от всех сайтов.
Вы можете попробовать использовать следующее с помощью 15.519232988358
чтобы проверить только 100 доменов diff, только 15.519232988358
$url = "google.com facebook.com youtube.com yahoo.com baidu.com wikipedia.org live.com twitter.com qq.com amazon.com taobao.com linkedin.com blogspot.com google.co.in yahoo.co.jp sina.com.cn google.de yandex.ru msn.com wordpress.com google.co.jp google.com.hk bing.com ebay.com google.co.uk google.fr vk.com microsoft.com babylon.com weibo.com googleusercontent.com 163.com tumblr.com apple.com mail.ru pinterest.com soso.com google.com.br tmall.com google.es paypal.com google.ru google.it xhamster.com craigslist.org sohu.com blogger.com fc2.com imdb.com go.com ebay.de google.co.id zedo.com alibaba.com mywebsearch.com google.com.tr adf.ly stackoverflow.com blogspot.in redtube.com amazon.co.uk 360buy.com google.com.au alipay.com sogou.com about.com instagram.com ebay.co.uk nytimes.com livedoor.com google.pl netflix.com imgur.com uol.com.br dailymotion.com wordpress.org 360.cn cnet.com godaddy.com youporn.com bp.blogspot.com ameblo.jp mediafire.com adcash.com globo.com chinaz.com weather.com incredibar.com neobux.com xnxx.com google.nl ehow.com douban.com google.com.sa 4shared.com vimeo.com livejournal.com dropbox.com renren.com doubleclick.com"; echo "<pre>"; $url = explode("\n", $url); set_time_limit(0); $start = microtime(true); multiplePost($url); echo PHP_EOL, microtime(true) - $start;
Вывод
Status 301 for : http://www.google.com/ Error (x) Status 302 for : https://facebook.com/ Error (x) Status ok for : http://www.youtube.com/ Done (^) Status ok for : http://www.yahoo.com/ Done (^) Status ok for : http://baidu.com Done (^) Status ok for : http://www.wikipedia.org/ Done (^) Status 301 for : https://home.live.com/ Error (x) Status ok for : http://twitter.com Done (^) Status ok for : http://www.qq.com/ Done (^) Status ok for : http://www.amazon.com/ Done (^) Status ok for : http://www.taobao.com/ Done (^) Status ok for : http://www.linkedin.com/ Done (^) Status 302 for : https://accounts.google.com/ServiceLogin?service=blogger&passive=1209600&continue=http://www.blogger.com/home&followup=http://www.blogger.com/home<mpl=start Error (x) Status ok for : http://www.google.co.in/ Done (^) Status ok for : http://www.yahoo.co.jp/ Done (^) Status 0 for : http://sina.com.cn Error (x) Status ok for : http://www.google.de/ Done (^) Status 301 for : http://www.yandex.ru/ Error (x) Status ok for : http://www.msn.com/ Done (^) Status ok for : http://wordpress.com Done (^) Status ok for : http://www.google.co.jp/ Done (^) Status ok for : http://www.google.com.hk/ Done (^) Status ok for : http://www.bing.com/ Done (^) Status 301 for : http://www.ebay.com Error (x) Status ok for : http://www.google.co.uk/ Done (^) Status ok for : http://www.google.fr/ Done (^) Status ok for : http://vk.com Done (^) Status ok for : http://www.microsoft.com/en-ng/default.aspx Done (^) Status ok for : http://www.babylon.com/ Done (^) Status ok for : http://weibo.com Done (^) Status 0 for : http://googleusercontent.com Error (x) Status 0 for : http://163.com Error (x) Status 302 for : https://www.tumblr.com/ Error (x) Status ok for : http://www.apple.com/ Done (^) Status ok for : http://mail.ru Done (^) Status ok for : http://pinterest.com Done (^) Status 0 for : http://soso.com Error (x) Status ok for : http://www.google.com.br/ Done (^) Status ok for : http://www.tmall.com/ Done (^) Status ok for : http://www.google.es/ Done (^) Status 302 for : https://paypal.com/ Error (x) Status ok for : http://www.google.ru/ Done (^) Status ok for : http://www.google.it/ Done (^) Status ok for : http://xhamster.com Done (^) Status ok for : http://www.craigslist.org/about/sites/ Done (^) Status 302 for : http://www.sohu.com/ Error (x) Status 302 for : https://accounts.google.com/ServiceLogin?service=blogger&passive=1209600&continue=http://www.blogger.com/home&followup=http://www.blogger.com/home<mpl=start Error (x) Status ok for : http://fc2.com Done (^) Status ok for : http://www.imdb.com/ Done (^) Status ok for : http://go.com Done (^) Status 301 for : http://www.ebay.de Error (x) Status ok for : http://www.google.co.id/ Done (^) Status ok for : http://www.zedo.com/ Done (^) Status ok for : http://www.alibaba.com/ Done (^) Status ok for : http://home.mywebsearch.com/ Done (^) Status ok for : http://www.google.com.tr/ Done (^) Status ok for : http://adf.ly Done (^) Status ok for : http://stackoverflow.com Done (^) Status 302 for : http://www.google.com/ Error (x) Status ok for : http://www.redtube.com/ Done (^) Status ok for : http://www.amazon.co.uk/ Done (^) Status ok for : http://360buy.com Done (^) Status ok for : http://www.google.com.au/ Done (^) Status 301 for : https://www.alipay.com/?src=alipay.com Error (x) Status ok for : http://www.sogou.com/ Done (^) Status ok for : http://www.about.com/ Done (^) Status ok for : http://instagram.com Done (^) Status 301 for : http://www.ebay.co.uk Error (x) Status ok for : http://www.nytimes.com/ Done (^) Status ok for : http://www.livedoor.com/ Done (^) Status ok for : http://www.google.pl/ Done (^) Status 301 for : http://www.netflix.com/ Error (x) Status ok for : http://imgur.com Done (^) Status ok for : http://www.uol.com.br/ Done (^) Status 301 for : http://www.dailymotion.com/ Error (x) Status ok for : http://wordpress.org Done (^) Status ok for : http://360.cn Done (^) Status ok for : http://www.cnet.com/ Done (^) Status ok for : http://www.godaddy.com/ Done (^) Status ok for : http://www.youporn.com/ Done (^) Status 0 for : http://bp.blogspot.com Error (x) Status ok for : http://ameblo.jp Done (^) Status ok for : http://www.mediafire.com/ Done (^) Status 301 for : https://www.adcash.com/index.php Error (x) Status 301 for : http://www.globo.com/ Error (x) Status ok for : http://chinaz.com Done (^) Status ok for : http://www.weather.com/ Done (^) Status ok for : http://incredibar.com/essentials/homepage Done (^) Status ok for : http://www.neobux.com/ Done (^) Status 301 for : http://www.xnxx.com/ Error (x) Status ok for : http://www.google.nl/ Done (^) Status ok for : http://www.ehow.com/ Done (^) Status 0 for : http://douban.com Error (x) Status ok for : http://www.google.com.sa/ Done (^) Status 301 for : http://www.4shared.com Error (x) Status ok for : http://vimeo.com Done (^) Status ok for : http://www.livejournal.com/ Done (^) Status 302 for : https://www.dropbox.com/ Error (x) Status ok for : http://renren.com Done (^) Status ok for : http://www.google.com/doubleclick/ Done (^) 15.519232988358 <--------------- Total Time Taken
Используемая функция
function multiplePost($nodes) { $mh = curl_multi_init(); $curl_array = array(); foreach ( $nodes as $i => $domain ) { $domain = trim($domain); $curl_array[$i] = curl_init("http://$domain"); curl_setopt($curl_array[$i], CURLOPT_RETURNTRANSFER, true); curl_setopt($curl_array[$i], CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729)'); curl_setopt($curl_array[$i], CURLOPT_CONNECTTIMEOUT, 5); curl_setopt($curl_array[$i], CURLOPT_TIMEOUT, 15); curl_setopt($curl_array[$i], CURLOPT_FOLLOWLOCATION, true); curl_multi_add_handle($mh, $curl_array[$i]); } echo PHP_EOL, PHP_EOL; $running = NULL; do { usleep(10000); curl_multi_exec($mh, $running); } while ( $running > 0 ); $res = array(); foreach ( $nodes as $i => $url ) { $curlErrorCode = curl_errno($curl_array[$i]); if ($curlErrorCode === 0) { $info = curl_getinfo($curl_array[$i]); $info['url'] = trim($info['url']); if ($info['http_code'] == 200) { echo "Status ok for : {$info['url']} Done (^)", PHP_EOL; $content = curl_multi_getcontent($curl_array[$i]); // get content } else { echo "Status {$info['http_code'] } for : {$info['url']} Error (x)", PHP_EOL; } } curl_multi_remove_handle($mh, $curl_array[$i]); curl_close($curl_array[$i]); flush(); ob_flush(); } curl_multi_close($mh); }