Codice PHP:
<?phpini_set('max_execution_time',3000);$url = $_POST['url'];$result = getUrltext($url);
$bcheck = true;while ($bcheck) { $delString = getStrContent("<script", "</script>", $result); //echo $delString; if ($delString != '') { $result = str_replace("<script" . $delString . "</script>", '', $result); }else{ $bcheck = false; }}
$bcheck = true;while ($bcheck){ $delString = getStrContent("<style","</style>",$result); if ($delString != '') { $result = str_replace("<style" . $delString . "</style>", '', $result); }else{ $bcheck = false; }}
$bcheck = true;while ($bcheck){ $delString = getStrContent("<",">",$result); if ($delString != '') { $result = str_replace( "<" .$delString . ">", ' ', $result);
}else{ $bcheck = false; }}
$result = str_replace("\n",'',$result);$result = str_replace("\t",'',$result);$result = preg_replace("/[ #\&\+\-%@=\/\\\:;,\.'\"\^`~\_|\!\?\*$#<>()\[\]\{\}]/i", ' ', $result);$valueList1 = array();$valueList2 = array();$valresult = array();
$valueList1 = explode(' ',$result);$valueList1 = array_filter($valueList1);$valueList2 = $valueList1;$valueList1 = array_unique($valueList1);
foreach($valueList1 as $value1){ $ncount = 0; foreach($valueList2 as $value2){ if($value1 == $value2){ $ncount++; } } if($ncount > 4 && strlen($value1) > 4){ $value['name'] = $value1; $value['count'] = $ncount; array_push($valresult,$value); }
}
foreach($valresult as $key => $row){ $volume[$key] = $row['count'];}array_multisort($volume, SORT_DESC, $valresult);
function getUrltext($url, $encoding = true) { //ini_set('max_execution_time', 300); //set_time_limit (300); $ch = curl_init(); $headers = array('User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); //curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 0); /*curl_setopt($c, CURLOPT_PROXYTYPE, 'HTTP'); curl_setopt($ch, CURLOPT_PROXY, '92.114.43.122'); curl_setopt($ch, CURLOPT_PROXYPORT, '8080'); */ curl_setopt($ch, CURLOPT_URL, $url); //curl_setopt($ch, CURLOPT_ENCODING ,"windows-1251"); curl_setopt ($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //curl_setopt($ch, CURLOPT_COOKIEJAR,COOKIE); //curl_setopt($ch,CURLOPT_COOKIEFILE,COOKIE); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); $result = curl_exec($ch); if($encoding) $result = mb_convert_encoding($result,"utf-8","windows-1251"); curl_close ($ch); return $result; }
function getStrContent($startText ,$endText, $strContent){ $strContent = stristr($strContent, $startText); $strContent = substr($strContent, strlen($startText)); $stop = stripos($strContent,$endText); $strContent = substr($strContent,0,$stop); return $strContent;}?>
<table> <?php foreach($valresult as $item){ echo "<tr><td>" . $item['name'] . "</td><td>" . $item['count'] . "</td></tr>"; } ?></table>