dig555
Постоялец
- Регистрация
- 22 Июн 2007
- Сообщения
- 365
- Реакции
- 160
- Автор темы
- #1
Ищу серверный парсер сниппета гугла. Поделитесь, пожалуйста, у кого есть:bc:
Нашёл сам. Топик можно снести.
Нашёл сам. Топик можно снести.
PHP:
<?php
$kwd = "buy car";
$num=100;
//****** ШАГ №1. ПОЛУЧАЮ ВЫДАЧУ ГУГЛА *****************************************************************************************
$url = "http://www.google.ru/search?q=".str_replace(' ','+',$kwd)."&hl=en&num=$num&lr=lang_en";
$ch = curl_init();
curl_setopt($ch, CURLOPT_COOKIEJAR, 'C:/apache/localhost/www/Doorgen/cookie.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'C:/apache/localhost/www/Doorgen/cookie.txt');
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 180);
curl_setopt($ch, CURLOPT_REFERER, 'http://google.com/');
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
$result = curl_exec($ch);
curl_close($ch);
//*******************************************************************************************************************************
//echo $result;
$result = str_replace("\"j hc\"","\"j\"",$result);
$resnum = preg_match_all("~'\)\">(.*)</a></h2>~Uis",$result,$_titles);
$resnum1 = preg_match_all("~<td class=\"j\">(.*)<br>~Uis",$result,$_snippets);
$resnum2 = preg_match_all("~<h2 class=r><a href=\"(.*)\" target=_blank class=l~Uis",$result,$_links);
echo "<br><font class=tval><b>Количество результатов</b></font> = $resnum <br>";
for($i=0;$i<$resnum;$i++)
{
$title = strip_tags( trim( $_titles[1][$i] ) );
$snippet = strip_tags( trim( $_snippets[1][$i] ) );
$link = strip_tags( trim( $_links[1][$i] ) );
//echo "<b>Title ".($i+1).":</b> $title<br>";
//echo "<b>Snippet ".($i+1).":</b> $snippet<br>";
//echo "<b>Link ".($i+1).":</b> $link<br>";
$snippet = str_replace(", ...",". ",$snippet);
$snippet = str_replace(" ...",". ",$snippet);
$snippet = str_replace("..",".",$snippet);
$snippet = $snippet." ";
$txtnum = preg_match_all("~[A-Z]([a-z])+ ([A-Za-z ,;'&0-9$-])*(\.|\?|!) ~Uis",$snippet,$_text);
$snippettext = "";
for($j=0;$j<$txtnum;$j++)
{
$_text[0][$j][0] = strtoupper( $_text[0][$j][0] );
if (strlen($_text[0][$j]) > 50)
$snippettext = $snippettext.trim($_text[0][$j])."\r\n";
}
$snippettext = str_replace("'","'",$snippettext);
$snippettext = str_replace("&","&",$snippettext);
$snippettext = str_replace("·","",$snippettext);
$snippettext = str_replace(""","'",$snippettext);
$snippettext = str_replace(">","",$snippettext);
$snippettext = str_replace("...",".",$snippettext);
$snippettext = str_replace("..",".",$snippettext);
$snippettext = str_replace(".,",".",$snippettext);
//exit();
//echo $snippettext;
//echo "<br><Br>";
$RESULTS[$i]['title'] = $title;
$RESULTS[$i]['snippet'] = $snippet;
$RESULTS[$i]['link'] = $link;
$RESULTS[$i]['snippettext'] = $snippettext;
echo "<b>title</b>: ".$title."<br>";
echo "<b>snippet</b>: ".$snippet."<br>";
echo "<b>link</b>: ".$link."<br>";
echo "<b>snippettext</b>: ".$snippettext."<br><br>";
}
?>