Как получить все URL-адреса со страницы (php)

У меня есть страница с URL-адресами с описаниями, которые перечислены под другим (что-то вроде закладок / списка сайтов). Как использовать php для получения всех URL-адресов с этой страницы и записи их в txt-файл (по одному на строку, только URL-адрес без описания)?

Страница выглядит так:

Некоторое описание

Другое описание

Еще один

И я хотел бы, чтобы вывод txt на экране выглядел следующим образом:

http://link.com

http://link2.com

http://link3.com

в одну сторону

$url="http://wwww.somewhere.com"; $data=file_get_contents($url); $data = strip_tags($data,"<a>"); $d = preg_split("/<\/a>/",$data); foreach ( $d as $k=>$u ){ if( strpos($u, "<a href=") !== FALSE ){ $u = preg_replace("/.*<a\s+href=\"/sm","",$u); $u = preg_replace("/\".*/","",$u); print $u."\n"; } }

Другой путь

 $url = "http://wwww.somewhere.com"; $html = file_get_contents($url); $doc = new DOMDocument(); $doc->loadHTML($html); //helps if html is well formed and has proper use of html entities! $xpath = new DOMXpath($doc); $nodes = $xpath->query('//a'); foreach($nodes as $node) { var_dump($node->getAttribute('href')); }

Вы можете использовать это, чтобы получить всю ссылку на данной веб-странице.

 <?php $var = fread_url($url); preg_match_all ("/a[\s]+[^>]*?href[\s]?=[\s\"\']+". "(.*?)[\"\']+.*?>"."([^<]+|.*?)?<\/a>/", $var, &$matches); $matches = $matches[1]; $list = array(); foreach($matches as $var) { print($var."<br>"); } function fread_url($url,$ref="") { if(function_exists("curl_init")){ $ch = curl_init(); $user_agent = "Mozilla/4.0 (compatible; MSIE 5.01; ". "Windows NT 5.0)"; $ch = curl_init(); curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); curl_setopt( $ch, CURLOPT_HTTPGET, 1 ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 ); curl_setopt( $ch, CURLOPT_URL, $url ); curl_setopt( $ch, CURLOPT_REFERER, $ref ); curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); $html = curl_exec($ch); curl_close($ch); } else{ $hfile = fopen($url,"r"); if($hfile){ while(!feof($hfile)){ $html.=fgets($hfile,1024); } } } return $html; } ?> в <?php $var = fread_url($url); preg_match_all ("/a[\s]+[^>]*?href[\s]?=[\s\"\']+". "(.*?)[\"\']+.*?>"."([^<]+|.*?)?<\/a>/", $var, &$matches); $matches = $matches[1]; $list = array(); foreach($matches as $var) { print($var."<br>"); } function fread_url($url,$ref="") { if(function_exists("curl_init")){ $ch = curl_init(); $user_agent = "Mozilla/4.0 (compatible; MSIE 5.01; ". "Windows NT 5.0)"; $ch = curl_init(); curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); curl_setopt( $ch, CURLOPT_HTTPGET, 1 ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION , 1 ); curl_setopt( $ch, CURLOPT_URL, $url ); curl_setopt( $ch, CURLOPT_REFERER, $ref ); curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt'); $html = curl_exec($ch); curl_close($ch); } else{ $hfile = fopen($url,"r"); if($hfile){ while(!feof($hfile)){ $html.=fgets($hfile,1024); } } } return $html; } ?>