Ранее я использовал следующий скрипт cURL для использования данных с веб-сайта DVLA, чтобы помочь с внедрением данных на mysite. Приносим извинения, что я не могу указать на исходный исходный код, поскольку я получил его с этого сайта, и не смог заметить источник.
function hhb_curl_exec($ch, $url) { static $hhb_curl_domainCache = ""; //$hhb_curl_domainCache=&$this->hhb_curl_domainCache; //$ch=&$this->curlh; if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { throw new InvalidArgumentException('$ch must be a curl handle!'); } if (!is_string($url)) { throw new InvalidArgumentException('$url must be a string!'); } $tmpvar = ""; if (parse_url($url, PHP_URL_HOST) === null) { if (substr($url, 0, 1) !== '/') { $url = $hhb_curl_domainCache . '/' . $url; } else { $url = $hhb_curl_domainCache . $url; } } ; curl_setopt($ch, CURLOPT_URL, $url); $html = curl_exec($ch); if (curl_errno($ch)) { throw new Exception('Curl error (curl_errno=' . curl_errno($ch) . ') on url ' . var_export($url, true) . ': ' . curl_error($ch)); // echo 'Curl error: ' . curl_error($ch); } if ($html === '' && 203 != ($tmpvar = curl_getinfo($ch, CURLINFO_HTTP_CODE)) /*203 is "success, but no output"..*/ ) { throw new Exception('Curl returned nothing for ' . var_export($url, true) . ' but HTTP_RESPONSE_CODE was ' . var_export($tmpvar, true)); } ; //remember that curl (usually) auto-follows the "Location: " http redirects.. $hhb_curl_domainCache = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), PHP_URL_HOST); return $html; } function hhb_curl_exec2($ch, $url, &$returnHeaders = array(), &$returnCookies = array(), &$verboseDebugInfo = "") { $returnHeaders = array(); $returnCookies = array(); $verboseDebugInfo = ""; if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { throw new InvalidArgumentException('$ch must be a curl handle!'); } if (!is_string($url)) { throw new InvalidArgumentException('$url must be a string!'); } $verbosefileh = tmpfile(); $verbosefile = stream_get_meta_data($verbosefileh); $verbosefile = $verbosefile['uri']; curl_setopt($ch, CURLOPT_VERBOSE, 1); curl_setopt($ch, CURLOPT_STDERR, $verbosefileh); curl_setopt($ch, CURLOPT_HEADER, 1); $html = hhb_curl_exec($ch, $url); $verboseDebugInfo = file_get_contents($verbosefile); curl_setopt($ch, CURLOPT_STDERR, NULL); fclose($verbosefileh); unset($verbosefile, $verbosefileh); $headers = array(); $crlf = "\x0d\x0a"; $thepos = strpos($html, $crlf . $crlf, 0); $headersString = substr($html, 0, $thepos); $headerArr = explode($crlf, $headersString); $returnHeaders = $headerArr; unset($headersString, $headerArr); $htmlBody = substr($html, $thepos + 4); //should work on utf8/ascii headers... utf32? not so sure.. unset($html); //I REALLY HOPE THERE EXIST A BETTER WAY TO GET COOKIES.. good grief this looks ugly.. //at least it's tested and seems to work perfectly... $grabCookieName = function($str,&$len) { $len=0; $ret = ""; $i = 0; for ($i = 0; $i < strlen($str); ++$i) { ++$len; if ($str[$i] === ' ') { continue; } if ($str[$i] === '=') { --$len; break; } $ret .= $str[$i]; } return urldecode($ret); }; foreach ($returnHeaders as $header) { //Set-Cookie: crlfcoookielol=crlf+is%0D%0A+and+newline+is+%0D%0A+and+semicolon+is%3B+and+not+sure+what+else /*Set-Cookie:ci_spill=a%3A4%3A%7Bs%3A10%3A%22session_id%22%3Bs%3A32%3A%22305d3d67b8016ca9661c3b032d4319df%22%3Bs%3A10%3A%22ip_address%22%3Bs%3A14%3A%2285.164.158.128%22%3Bs%3A10%3A%22user_agent%22%3Bs%3A109%3A%22Mozilla%2F5.0+%28Windows+NT+6.1%3B+WOW64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F43.0.2357.132+Safari%2F537.36%22%3Bs%3A13%3A%22last_activity%22%3Bi%3A1436874639%3B%7Dcab1dd09f4eca466660e8a767856d013; expires=Tue, 14-Jul-2015 13:50:39 GMT; path=/ Set-Cookie: sessionToken=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT; //Cookie names cannot contain any of the following '=,; \t\r\n\013\014' // */ if (stripos($header, "Set-Cookie:") !== 0) { continue; /**/ } $header = trim(substr($header, strlen("Set-Cookie:"))); $len=0; while (strlen($header) > 0) { $cookiename = $grabCookieName($header,$len); $returnCookies[$cookiename] = ''; $header = substr($header, $len + 1); //also remove the = if (strlen($header) < 1) { break; } ; $thepos = strpos($header, ';'); if ($thepos === false) { //last cookie in this Set-Cookie. $returnCookies[$cookiename] = urldecode($header); break; } $returnCookies[$cookiename] = urldecode(substr($header, 0, $thepos)); $header = trim(substr($header, $thepos + 1)); //also remove the ; } } unset($header, $cookiename, $thepos); return $htmlBody; } ############################################################## function hhb_curl_init($custom_options_array = array()) { if (empty($custom_options_array)) { $custom_options_array = array(); //i feel kinda bad about this.. argv[1] of curl_init wants a string(url), or NULL //at least i want to allow NULL aswell :/ } if (!is_array($custom_options_array)) { throw new InvalidArgumentException('$custom_options_array must be an array!'); } ; $options_array = array( CURLOPT_AUTOREFERER => true, CURLOPT_BINARYTRANSFER => true, CURLOPT_COOKIESESSION => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_FORBID_REUSE => false, CURLOPT_HTTPGET => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_TIMEOUT => 11, CURLOPT_ENCODING => "" //CURLOPT_REFERER=>'example.org', //CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0' ); if (!array_key_exists(CURLOPT_COOKIEFILE, $custom_options_array)) { //do this only conditionally because tmpfile() call.. static $curl_cookiefiles_arr = array(); //workaround for https://bugs.php.net/bug.php?id=66014 $curl_cookiefiles_arr[] = $options_array[CURLOPT_COOKIEFILE] = tmpfile(); $options_array[CURLOPT_COOKIEFILE] = stream_get_meta_data($options_array[CURLOPT_COOKIEFILE]); $options_array[CURLOPT_COOKIEFILE] = $options_array[CURLOPT_COOKIEFILE]['uri']; } //we can't use array_merge() because of how it handles integer-keys, it would/could cause corruption foreach ($custom_options_array as $key => $val) { $options_array[$key] = $val; } unset($key, $val, $custom_options_array); $curl = curl_init(); curl_setopt_array($curl, $options_array); return $curl; } //------------------------------------------------// //Registration and VRM have to be valid and make as per the DVLA format for makes otehrwise this wont work! $registration_number = 'PK07LVD'; // must be a valid VRM to get the correct response $vehicle_maker ='BMW'; //Make must matchVRM held with DVLA and entered in same format as DVLA list on site $ch=hhb_curl_init(); $debugHeaders=array(); $debugCookies=array(); $debugRequest=''; $html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); //first do an empty request to get a session id and cookies and the weird VIEWSTATE stuff... $domd=@DOMDocument::loadHTML($html); assert(is_object($domd)); $__VIEWSTATE=$domd->getElementById('__VIEWSTATE')->getAttribute('value'); $__VIEWSTATEGENERATOR=$domd->getElementById('__VIEWSTATEGENERATOR')->getAttribute('value'); $__EVENTVALIDATION=$domd->getElementById('__EVENTVALIDATION')->getAttribute('value'); //var_dump('__VIEWSTATE:',$__VIEWSTATE,'__VIEWSTATEGENERATOR:',$__VIEWSTATEGENERATOR,'__EVENTVALIDATION:',$__EVENTVALIDATION,'headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest,'domd:',$domd); //now to get the POST stuff curl_setopt_array($ch,array( CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>http_build_query(array( '__LASTFOCUS'=>'', '__EVENTTARGET'=>'', '__VIEWSTATE'=>$__VIEWSTATE, '__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, '__EVENTVALIDATION'=>$__EVENTVALIDATION, 'ctl00$MainContent$txtSearchVrm'=>'$registration_number', 'ctl00$MainContent$MakeTextBox'=>'$vehicle_maker', 'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 'ctl00$MainContent$butSearch'=>'Search', )) )); $html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); //var_dump('headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest); echo $html; ?>
По какой-то причине за последнюю неделю что-то изменилось, и я не получаю результат, возвращаемый в $ html, все, что происходит, – это форма входа в регистрацию транспортного средства, и отображается «Make».
Переменные, которые отправляет запрос:
Array ( [__EVENTTARGET] => [__EVENTARGUMENT] => [__VIEWSTATE] => /wEPDwUKMTQ2ODczMjQwMA8WAh4NU2VydmljZUhlYWRlcjL8BAABAAAA/////wEAAAAAAAAADAIAAABBRG////LkRtc1NoYXJlZC5EYXRhQ29udHJhY3RzLkNvbW1vbi5TZXJ2aWNlVHlwZUNvZGUBAAAAB3ZhbHVlX18ACAIAAAACAAAABfr///8rRG1zU2hhcmVkLkRhdGFDb250cmFjdHMuQ29tbW9uLkxhbmd1YWdlQ29kZQEAAAAHdmFsdWVfXwAIAgAAAAAAAAAKCxYCZg9kFgICAQ9kFgRmDxYCHgRUZXh0BQ9WZWhpY2xlIGVucXVpcnlkAgIPDxYCHgtOYXZpZ2F0ZVVybAUvaHR0cHM6Ly93aC5zbmFwc3VydmV5cy5jb20vcy5hc3A/az0xNDcwMjMwNjQwNTRkZBgBBSFjdGwwMCRNYWluQ29udGVudCRtdlZlaGljbGVTZWFyY2gPD2RmZAl8wP9HdAZERXThmPjkY7mMhrt6 [__VIEWSTATEGENERATOR] => CA0B0334 [__EVENTVALIDATION] => /wEdAAec98WnAVQeQUdqU6NI4oVRBOwywjxOOgpEYFN2beEgnftoCCZcWJSqSRLD/FKuxxkI0x5r4gPeKgWgSNWptTEWInv2PXI3Jzdn3U6eHDG4Qb7lltCXTdtnDbitYujbDJI0GQSIMiv32DreL6oRbYpQ8QTO8WJr3q5Y80Jf5PzdZW5VzdA= [ctl00$MainContent$txtSearchVrm] => pk07lvd [ctl00$MainContent$MakeTextBox] => bmw [ctl00$MainContent$txtV5CDocumentReferenceNumber] => [ctl00$MainContent$butSearch] => Search )
Когда я запускаю форму удаленно с моего сервера и имею форму POST переменные в https://vehicleenquiry.service.gov.uk/ , я получаю возвращаемый требуемый результат, но, очевидно, отображается на веб-страницах браузера через веб-сайт Goverment ,
Я ищу помощь относительно того, что возможно происходит, или пошло не так с кодом. Вытягивая мои волосы, пытаясь исследовать каждый этап, почему, когда я запускаю cURL-кодирование, я не получаю результат, возвращаемый в $ html, а только в начальную форму запроса.
Я был бы благодарен за любую помощь. Я знаю, что есть аналогичная должность, но с меньшей детализацией относительно того, что было сделано для расследования проблемы. Из-за того, что новый сайт сказал, я не мог ответить.
// ——————- Редактирование исходного кода добавлено 09.10.116 21:46 —————- – //
Этот код является частью другой функции. С тех пор я снял функцию переопределения для ясности.
// ————————— Что касается вывода ———- //
В некотором смысле этот код действительно работает, так как $ html всегда выводит ответ из справочной службы автомобиля.
Проблема в том, что удаленный сайт ( https://vehicleenquiry.service.gov.uk ), похоже, не обрабатывает данные POST и не возвращает результат запроса. Все, что возвращается, это начальная форма запроса, как если бы вы не ввели никаких данных формы.
Я ожидал бы $ html, чтобы содержать ответ с информацией о транспортном средстве, налогом, мотивом:
Вместо этого команда cURL отвечает на возвращаемую форму запроса.
Первая ошибка:
'ctl00$MainContent$txtSearchVrm'=>'$registration_number', 'ctl00$MainContent$MakeTextBox'=>'$vehicle_maker',
Должно быть:
'ctl00$MainContent$txtSearchVrm'=>$registration_number, 'ctl00$MainContent$MakeTextBox'=>$vehicle_maker,
Вторая ошибка
$html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest);
Должно быть:
$html=hhb_curl_exec2($ch,'https://vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest);
Весь код:
<?php function dvlascrape($send_array) { $make_trimmed=$send_array['2']; $vrm_strip=$send_array['0']; function hhb_curl_exec($ch, $url) { static $hhb_curl_domainCache = ""; //$hhb_curl_domainCache=&$this->hhb_curl_domainCache; //$ch=&$this->curlh; if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { throw new InvalidArgumentException('$ch must be a curl handle!'); } if (!is_string($url)) { throw new InvalidArgumentException('$url must be a string!'); } $tmpvar = ""; if (parse_url($url, PHP_URL_HOST) === null) { if (substr($url, 0, 1) !== '/') { $url = $hhb_curl_domainCache . '/' . $url; } else { $url = $hhb_curl_domainCache . $url; } } ; curl_setopt($ch, CURLOPT_URL, $url); $html = curl_exec($ch); if (curl_errno($ch)) { throw new Exception('Curl error (curl_errno=' . curl_errno($ch) . ') on url ' . var_export($url, true) . ': ' . curl_error($ch)); // echo 'Curl error: ' . curl_error($ch); } if ($html === '' && 203 != ($tmpvar = curl_getinfo($ch, CURLINFO_HTTP_CODE)) /*203 is "success, but no output"..*/ ) { throw new Exception('Curl returned nothing for ' . var_export($url, true) . ' but HTTP_RESPONSE_CODE was ' . var_export($tmpvar, true)); } ; //remember that curl (usually) auto-follows the "Location: " http redirects.. $hhb_curl_domainCache = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), PHP_URL_HOST); return $html; } function hhb_curl_exec2($ch, $url, &$returnHeaders = array(), &$returnCookies = array(), &$verboseDebugInfo = "") { $returnHeaders = array(); $returnCookies = array(); $verboseDebugInfo = ""; if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { throw new InvalidArgumentException('$ch must be a curl handle!'); } if (!is_string($url)) { throw new InvalidArgumentException('$url must be a string!'); } $verbosefileh = tmpfile(); $verbosefile = stream_get_meta_data($verbosefileh); $verbosefile = $verbosefile['uri']; curl_setopt($ch, CURLOPT_VERBOSE, 1); curl_setopt($ch, CURLOPT_STDERR, $verbosefileh); curl_setopt($ch, CURLOPT_HEADER, 1); $html = hhb_curl_exec($ch, $url); $verboseDebugInfo = file_get_contents($verbosefile); curl_setopt($ch, CURLOPT_STDERR, NULL); fclose($verbosefileh); unset($verbosefile, $verbosefileh); $headers = array(); $crlf = "\x0d\x0a"; $thepos = strpos($html, $crlf . $crlf, 0); $headersString = substr($html, 0, $thepos); $headerArr = explode($crlf, $headersString); $returnHeaders = $headerArr; unset($headersString, $headerArr); $htmlBody = substr($html, $thepos + 4); //should work on utf8/ascii headers... utf32? not so sure.. unset($html); //I REALLY HOPE THERE EXIST A BETTER WAY TO GET COOKIES.. good grief this looks ugly.. //at least it's tested and seems to work perfectly... $grabCookieName = function($str,&$len) { $len=0; $ret = ""; $i = 0; for ($i = 0; $i < strlen($str); ++$i) { ++$len; if ($str[$i] === ' ') { continue; } if ($str[$i] === '=') { --$len; break; } $ret .= $str[$i]; } return urldecode($ret); }; foreach ($returnHeaders as $header) { //Set-Cookie: crlfcoookielol=crlf+is%0D%0A+and+newline+is+%0D%0A+and+semicolon+is%3B+and+not+sure+what+else /*Set-Cookie:ci_spill=a%3A4%3A%7Bs%3A10%3A%22session_id%22%3Bs%3A32%3A%22305d3d67b8016ca9661c3b032d4319df%22%3Bs%3A10%3A%22ip_address%22%3Bs%3A14%3A%2285.164.158.128%22%3Bs%3A10%3A%22user_agent%22%3Bs%3A109%3A%22Mozilla%2F5.0+%28Windows+NT+6.1%3B+WOW64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F43.0.2357.132+Safari%2F537.36%22%3Bs%3A13%3A%22last_activity%22%3Bi%3A1436874639%3B%7Dcab1dd09f4eca466660e8a767856d013; expires=Tue, 14-Jul-2015 13:50:39 GMT; path=/ Set-Cookie: sessionToken=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT; //Cookie names cannot contain any of the following '=,; \t\r\n\013\014' // */ if (stripos($header, "Set-Cookie:") !== 0) { continue; /**/ } $header = trim(substr($header, strlen("Set-Cookie:"))); $len=0; while (strlen($header) > 0) { $cookiename = $grabCookieName($header,$len); $returnCookies[$cookiename] = ''; $header = substr($header, $len + 1); //also remove the = if (strlen($header) < 1) { break; } ; $thepos = strpos($header, ';'); if ($thepos === false) { //last cookie in this Set-Cookie. $returnCookies[$cookiename] = urldecode($header); break; } $returnCookies[$cookiename] = urldecode(substr($header, 0, $thepos)); $header = trim(substr($header, $thepos + 1)); //also remove the ; } } unset($header, $cookiename, $thepos); return $htmlBody; } ############################################################## function hhb_curl_init($custom_options_array = array()) { if (empty($custom_options_array)) { $custom_options_array = array(); //i feel kinda bad about this.. argv[1] of curl_init wants a string(url), or NULL //at least i want to allow NULL aswell :/ } if (!is_array($custom_options_array)) { throw new InvalidArgumentException('$custom_options_array must be an array!'); } ; $options_array = array( CURLOPT_AUTOREFERER => true, CURLOPT_BINARYTRANSFER => true, CURLOPT_COOKIESESSION => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_FORBID_REUSE => false, CURLOPT_HTTPGET => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_TIMEOUT => 11, CURLOPT_ENCODING => "" //CURLOPT_REFERER=>'example.org', //CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0' ); if (!array_key_exists(CURLOPT_COOKIEFILE, $custom_options_array)) { //do this only conditionally because tmpfile() call.. static $curl_cookiefiles_arr = array(); //workaround for https://bugs.php.net/bug.php?id=66014 $curl_cookiefiles_arr[] = $options_array[CURLOPT_COOKIEFILE] = tmpfile(); $options_array[CURLOPT_COOKIEFILE] = stream_get_meta_data($options_array[CURLOPT_COOKIEFILE]); $options_array[CURLOPT_COOKIEFILE] = $options_array[CURLOPT_COOKIEFILE]['uri']; } //we can't use array_merge() because of how it handles integer-keys, it would/could cause corruption foreach ($custom_options_array as $key => $val) { $options_array[$key] = $val; } unset($key, $val, $custom_options_array); $curl = curl_init(); curl_setopt_array($curl, $options_array); return $curl; } //------------------------------------------------// //Registration and VRM have to be valid and make as per the DVLA format for makes otehrwise this wont work! $registration_number = 'PK07LVD'; // must be a valid VRM to get the correct response $vehicle_maker ='BMW'; //Make must matchVRM held with DVLA and entered in same format as DVLA list on site $ch=hhb_curl_init(); $debugHeaders=array(); $debugCookies=array(); $debugRequest=''; $html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); //first do an empty request to get a session id and cookies and the weird VIEWSTATE stuff... $domd=@DOMDocument::loadHTML($html); assert(is_object($domd)); $__VIEWSTATE=$domd->getElementById('__VIEWSTATE')->getAttribute('value'); $__VIEWSTATEGENERATOR=$domd->getElementById('__VIEWSTATEGENERATOR')->getAttribute('value'); $__EVENTVALIDATION=$domd->getElementById('__EVENTVALIDATION')->getAttribute('value'); //var_dump('__VIEWSTATE:',$__VIEWSTATE,'__VIEWSTATEGENERATOR:',$__VIEWSTATEGENERATOR,'__EVENTVALIDATION:',$__EVENTVALIDATION,'headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest,'domd:',$domd); var_dump($__VIEWSTATE); //now to get the POST stuff curl_setopt_array($ch,array( CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>http_build_query(array( '__LASTFOCUS'=>'', '__EVENTTARGET'=>'', '__EVENTARGUMENT'=>'', '__VIEWSTATE'=>$__VIEWSTATE, '__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, '__EVENTVALIDATION'=>$__EVENTVALIDATION, 'ctl00$MainContent$txtSearchVrm'=>$registration_number, 'ctl00$MainContent$MakeTextBox'=>$vehicle_maker, 'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 'ctl00$MainContent$butSearch'=>'Search', )) )); var_dump( array( '__LASTFOCUS'=>'', '__EVENTTARGET'=>'', '__EVENTARGUMENT'=>'', '__VIEWSTATE'=>$__VIEWSTATE, '__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, '__EVENTVALIDATION'=>$__EVENTVALIDATION, 'ctl00$MainContent$txtSearchVrm'=>$registration_number, 'ctl00$MainContent$MakeTextBox'=>$vehicle_maker, 'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 'ctl00$MainContent$butSearch'=>'Search', )); $html=hhb_curl_exec2($ch,'https://vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); //var_dump(curl_error ( $ch )) ; //var_dump('headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest); return $html; } var_dump(dvlascrape(array('', '', ''))); ?>
с<?php function dvlascrape($send_array) { $make_trimmed=$send_array['2']; $vrm_strip=$send_array['0']; function hhb_curl_exec($ch, $url) { static $hhb_curl_domainCache = ""; //$hhb_curl_domainCache=&$this->hhb_curl_domainCache; //$ch=&$this->curlh; if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { throw new InvalidArgumentException('$ch must be a curl handle!'); } if (!is_string($url)) { throw new InvalidArgumentException('$url must be a string!'); } $tmpvar = ""; if (parse_url($url, PHP_URL_HOST) === null) { if (substr($url, 0, 1) !== '/') { $url = $hhb_curl_domainCache . '/' . $url; } else { $url = $hhb_curl_domainCache . $url; } } ; curl_setopt($ch, CURLOPT_URL, $url); $html = curl_exec($ch); if (curl_errno($ch)) { throw new Exception('Curl error (curl_errno=' . curl_errno($ch) . ') on url ' . var_export($url, true) . ': ' . curl_error($ch)); // echo 'Curl error: ' . curl_error($ch); } if ($html === '' && 203 != ($tmpvar = curl_getinfo($ch, CURLINFO_HTTP_CODE)) /*203 is "success, but no output"..*/ ) { throw new Exception('Curl returned nothing for ' . var_export($url, true) . ' but HTTP_RESPONSE_CODE was ' . var_export($tmpvar, true)); } ; //remember that curl (usually) auto-follows the "Location: " http redirects.. $hhb_curl_domainCache = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), PHP_URL_HOST); return $html; } function hhb_curl_exec2($ch, $url, &$returnHeaders = array(), &$returnCookies = array(), &$verboseDebugInfo = "") { $returnHeaders = array(); $returnCookies = array(); $verboseDebugInfo = ""; if (!is_resource($ch) || get_resource_type($ch) !== 'curl') { throw new InvalidArgumentException('$ch must be a curl handle!'); } if (!is_string($url)) { throw new InvalidArgumentException('$url must be a string!'); } $verbosefileh = tmpfile(); $verbosefile = stream_get_meta_data($verbosefileh); $verbosefile = $verbosefile['uri']; curl_setopt($ch, CURLOPT_VERBOSE, 1); curl_setopt($ch, CURLOPT_STDERR, $verbosefileh); curl_setopt($ch, CURLOPT_HEADER, 1); $html = hhb_curl_exec($ch, $url); $verboseDebugInfo = file_get_contents($verbosefile); curl_setopt($ch, CURLOPT_STDERR, NULL); fclose($verbosefileh); unset($verbosefile, $verbosefileh); $headers = array(); $crlf = "\x0d\x0a"; $thepos = strpos($html, $crlf . $crlf, 0); $headersString = substr($html, 0, $thepos); $headerArr = explode($crlf, $headersString); $returnHeaders = $headerArr; unset($headersString, $headerArr); $htmlBody = substr($html, $thepos + 4); //should work on utf8/ascii headers... utf32? not so sure.. unset($html); //I REALLY HOPE THERE EXIST A BETTER WAY TO GET COOKIES.. good grief this looks ugly.. //at least it's tested and seems to work perfectly... $grabCookieName = function($str,&$len) { $len=0; $ret = ""; $i = 0; for ($i = 0; $i < strlen($str); ++$i) { ++$len; if ($str[$i] === ' ') { continue; } if ($str[$i] === '=') { --$len; break; } $ret .= $str[$i]; } return urldecode($ret); }; foreach ($returnHeaders as $header) { //Set-Cookie: crlfcoookielol=crlf+is%0D%0A+and+newline+is+%0D%0A+and+semicolon+is%3B+and+not+sure+what+else /*Set-Cookie:ci_spill=a%3A4%3A%7Bs%3A10%3A%22session_id%22%3Bs%3A32%3A%22305d3d67b8016ca9661c3b032d4319df%22%3Bs%3A10%3A%22ip_address%22%3Bs%3A14%3A%2285.164.158.128%22%3Bs%3A10%3A%22user_agent%22%3Bs%3A109%3A%22Mozilla%2F5.0+%28Windows+NT+6.1%3B+WOW64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F43.0.2357.132+Safari%2F537.36%22%3Bs%3A13%3A%22last_activity%22%3Bi%3A1436874639%3B%7Dcab1dd09f4eca466660e8a767856d013; expires=Tue, 14-Jul-2015 13:50:39 GMT; path=/ Set-Cookie: sessionToken=abc123; Expires=Wed, 09 Jun 2021 10:18:14 GMT; //Cookie names cannot contain any of the following '=,; \t\r\n\013\014' // */ if (stripos($header, "Set-Cookie:") !== 0) { continue; /**/ } $header = trim(substr($header, strlen("Set-Cookie:"))); $len=0; while (strlen($header) > 0) { $cookiename = $grabCookieName($header,$len); $returnCookies[$cookiename] = ''; $header = substr($header, $len + 1); //also remove the = if (strlen($header) < 1) { break; } ; $thepos = strpos($header, ';'); if ($thepos === false) { //last cookie in this Set-Cookie. $returnCookies[$cookiename] = urldecode($header); break; } $returnCookies[$cookiename] = urldecode(substr($header, 0, $thepos)); $header = trim(substr($header, $thepos + 1)); //also remove the ; } } unset($header, $cookiename, $thepos); return $htmlBody; } ############################################################## function hhb_curl_init($custom_options_array = array()) { if (empty($custom_options_array)) { $custom_options_array = array(); //i feel kinda bad about this.. argv[1] of curl_init wants a string(url), or NULL //at least i want to allow NULL aswell :/ } if (!is_array($custom_options_array)) { throw new InvalidArgumentException('$custom_options_array must be an array!'); } ; $options_array = array( CURLOPT_AUTOREFERER => true, CURLOPT_BINARYTRANSFER => true, CURLOPT_COOKIESESSION => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_FORBID_REUSE => false, CURLOPT_HTTPGET => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_TIMEOUT => 11, CURLOPT_ENCODING => "" //CURLOPT_REFERER=>'example.org', //CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0' ); if (!array_key_exists(CURLOPT_COOKIEFILE, $custom_options_array)) { //do this only conditionally because tmpfile() call.. static $curl_cookiefiles_arr = array(); //workaround for https://bugs.php.net/bug.php?id=66014 $curl_cookiefiles_arr[] = $options_array[CURLOPT_COOKIEFILE] = tmpfile(); $options_array[CURLOPT_COOKIEFILE] = stream_get_meta_data($options_array[CURLOPT_COOKIEFILE]); $options_array[CURLOPT_COOKIEFILE] = $options_array[CURLOPT_COOKIEFILE]['uri']; } //we can't use array_merge() because of how it handles integer-keys, it would/could cause corruption foreach ($custom_options_array as $key => $val) { $options_array[$key] = $val; } unset($key, $val, $custom_options_array); $curl = curl_init(); curl_setopt_array($curl, $options_array); return $curl; } //------------------------------------------------// //Registration and VRM have to be valid and make as per the DVLA format for makes otehrwise this wont work! $registration_number = 'PK07LVD'; // must be a valid VRM to get the correct response $vehicle_maker ='BMW'; //Make must matchVRM held with DVLA and entered in same format as DVLA list on site $ch=hhb_curl_init(); $debugHeaders=array(); $debugCookies=array(); $debugRequest=''; $html=hhb_curl_exec2($ch,'https://www.vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); //first do an empty request to get a session id and cookies and the weird VIEWSTATE stuff... $domd=@DOMDocument::loadHTML($html); assert(is_object($domd)); $__VIEWSTATE=$domd->getElementById('__VIEWSTATE')->getAttribute('value'); $__VIEWSTATEGENERATOR=$domd->getElementById('__VIEWSTATEGENERATOR')->getAttribute('value'); $__EVENTVALIDATION=$domd->getElementById('__EVENTVALIDATION')->getAttribute('value'); //var_dump('__VIEWSTATE:',$__VIEWSTATE,'__VIEWSTATEGENERATOR:',$__VIEWSTATEGENERATOR,'__EVENTVALIDATION:',$__EVENTVALIDATION,'headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest,'domd:',$domd); var_dump($__VIEWSTATE); //now to get the POST stuff curl_setopt_array($ch,array( CURLOPT_POST=>true, CURLOPT_POSTFIELDS=>http_build_query(array( '__LASTFOCUS'=>'', '__EVENTTARGET'=>'', '__EVENTARGUMENT'=>'', '__VIEWSTATE'=>$__VIEWSTATE, '__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, '__EVENTVALIDATION'=>$__EVENTVALIDATION, 'ctl00$MainContent$txtSearchVrm'=>$registration_number, 'ctl00$MainContent$MakeTextBox'=>$vehicle_maker, 'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 'ctl00$MainContent$butSearch'=>'Search', )) )); var_dump( array( '__LASTFOCUS'=>'', '__EVENTTARGET'=>'', '__EVENTARGUMENT'=>'', '__VIEWSTATE'=>$__VIEWSTATE, '__VIEWSTATEGENERATOR'=>$__VIEWSTATEGENERATOR, '__EVENTVALIDATION'=>$__EVENTVALIDATION, 'ctl00$MainContent$txtSearchVrm'=>$registration_number, 'ctl00$MainContent$MakeTextBox'=>$vehicle_maker, 'ctl00$MainContent$txtV5CDocumentReferenceNumber'=>'', 'ctl00$MainContent$butSearch'=>'Search', )); $html=hhb_curl_exec2($ch,'https://vehicleenquiry.service.gov.uk/Default.aspx',$debugHeaders,$debugCookies,$debugRequest); //var_dump(curl_error ( $ch )) ; //var_dump('headers:',$debugHeaders,'cookies:',$debugCookies,'html:',$html,'request:',$debugRequest); return $html; } var_dump(dvlascrape(array('', '', ''))); ?>