Использование CURL для получения контента с веб-сайта. Получение ответа в объекте. Как преобразовать этот объект в PHP Простой HTML DOM Parser
function get_data($url) { $ch = curl_init(); $timeout = 30; curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_RETURNTRANSFER,false); curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout); curl_setopt($ch,CURLOPT_POST,false); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0"); //curl_exec($ch); $dom = new simple_html_dom(curl_exec($ch)); print_r( $dom ); curl_close($ch); return $data; } $url = 'http://www.example.com'; $data = get_data($url); ?>
результат
simple_html_dom Object ([root] => simple_html_dom_node Object ([nodetype] => 5 [tag] => root [attr] => Array () [children] => Array () [nodes] => Array ([0] = > simple_html_dom_node Object ([nodetype] => 3 [tag] => text [attr] => Array () [children] => Array () [nodes] => Array () [parent] => simple_html_dom_node Object * RECURSION * [_] => Массив ([4] => 1) [tag_start] => 0 [dom: private] => simple_html_dom Object * RECURSION *)) [parent] => [_] => Array ([0] = > -1 [1] => 2) [tag_start] => 0 [dom: private] => simple_html_dom Object * RECURSION *) [nodes] => Array ([0] => simple_html_dom_node Object ([nodetype] => 5 [tag] => root [attr] => Array () [children] => Array () [nodes] => Array ([0] => simple_html_dom_node Object ([nodetype] => 3 [tag] => текст [ attr] => Array () [children] => Array () [nodes] => Array () [parent] => simple_html_dom_node Object * RECURSION * [_] => Array ([4] => 1) [tag_start] => 0 [dom: private] => simple_html_dom Object * RECURSION *)) [parent] => [_] => Массив ([0] => -1 [1] => 2) [tag_start] => 0 [dom: private] => simple_html_dom Object * RECURSION *) [1] => simple_html_dom_node Объект ( [nodetype] => 3 [tag] => text [attr] => Array () [children] => Array () [nodes] => Array () [parent] => simple_html_dom_node Object ([nodetype] => 5 [tag] => root [attr] => Array () [children] => Array () [nodes] => Array ([0] => simple_html_dom_node Object * RECURSION *) [parent] => [_] => Массив ([0] => -1 [1] => 2) [tag_start] => 0 [dom: private] => simple_html_dom Object * RECURSION *) [_] => Массив ([4] => 1) [ tag_start] => 0 [dom: private] => simple_html_dom Object * RECURSION *)) [обратный вызов] => [нижний регистр] => 1 [original_size] => 1 [size] => 1 [pos: protected] => 1 [char: protected] => [cursor: protected] => 2 [parent: protected] => simple_html_dom_node Object ([nodetype] => 5 [tag] => root [attr] => Array () [children] => Array () [nodes] => Array ([0] => simple_html_dom_node Object ([nodetype] => 3 [tag] => текст [attr] => Arra y () [children] => Array () [nodes] => Array () [parent] => simple_html_dom_node Object * RECURSION * [_] => Array ([4] => 1) [tag_start] => 0 [ dom: private] => simple_html_dom Object * RECURSION *)) [parent] => [_] => Array ([0] => -1 [1] => 2) [tag_start] => 0 [dom: private] => simple_html_dom Object * RECURSION *) [token_blank: protected] => [token_equal: protected] => = /> [token_slash: protected] => /> [token_attr: protected] =>> [_charset] => UTF-8 [_target_charset] => UTF-8 [default_br_text: protected] => [default_span_text] => [self_closing_tags: protected] => Array ([img] => 1 [br] => 1 [input] => 1 [meta] => 1 [link] => 1 [hr] => 1 [base] => 1 => 1 [spacer] => 1) [block_tags: protected] => Array ([root] => 1 [ body] => 1 [form] => 1 [div] => 1 [span] => 1 [table] => 1) [optional_closing_tags: protected] => Array ([tr] => Array ([tr] = > 1 [td] => 1 [th] => 1) [th] => Массив ([th] => 1) [td] => Массив ([td] => 1) [li] => Массив ( [li] => 1) [dt] => Array ([dt] => 1 [ dd] => 1) [dd] => Array ([dd] => 1 [dt] => 1) [dl] => Array ([dd] => 1 [dt] => 1) [p] = > Array ([p] => 1) [nobr] => Array ([nobr] => 1) [b] => Array ([b] => 1) [option] => Array ([option] => 1)) [doc: protected] => 1 [noise: protected] => Array ())
Вы не создаете DOM правильно, вы должны сделать это следующим образом:
// Create a DOM object $dom = new simple_html_dom(); // Load HTML from a string $dom->load(curl_exec($ch)) print_r( $dom );
Подробнее см. Руководство.
редактировать
Кажется, что это проблема настроек cURL, пожалуйста, обратитесь к документации, чтобы правильно ее настроить …
Это функция, которую я обычно использую для загрузки некоторых страниц, не стесняйтесь приспосабливать ее к вашим потребностям:
function dlPage($href) { $curl = curl_init(); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($curl, CURLOPT_HEADER, false); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); curl_setopt($curl, CURLOPT_URL, $href); curl_setopt($curl, CURLOPT_REFERER, $href); curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.125 Safari/533.4"); $str = curl_exec($curl); curl_close($curl); // Create a DOM object $dom = new simple_html_dom(); // Load HTML from a string $dom->load($str); return $dom; } $url = 'http://www.example.com/'; $data = dlPage($url); print_r($data);
Curl вернет строку, содержащую HTML-код? Просто используйте образец быстрого запуска ?
$html = str_get_html(curl_exec($ch));