ho realizzato uno script che funziona... prende tutte le parole, conta quelle uguali e fa quello che deve fare
però ho 2 problemi:
1) vorrei considerare solo il contenuto di un tag td specifico
2) vorrei poter confrontare dove nella pagina ci sono due parole di una frase uguali oppure addirittura tutte le frasi uguali presenti e non le parole...

vi posto lo script che cmq funziona e può servire a qualcuno....
Codice PHP:
<?phpini_set('max_execution_time',3000);$url $_POST['url'];$result getUrltext($url);
$bcheck true;while ($bcheck) {    $delString getStrContent("<script""</script>"$result);    //echo $delString;    if ($delString != '') {        $result = str_replace("<script" . $delString . "</script>", '', $result);    }else{        $bcheck = false;    }}
$bcheck true;while ($bcheck){    $delString getStrContent("<style","</style>",$result);    if ($delString != '') {        $result str_replace("<style" $delString "</style>"''$result);    }else{        $bcheck false;    }}
$bcheck true;while ($bcheck){    $delString getStrContent("<",">",$result);    if ($delString != '') {        $result str_replace"<" .$delString ">"' '$result);
    }else{        
$bcheck false;    }}
$result str_replace("\n",'',$result);$result str_replace("\t",'',$result);$result preg_replace("/[ #\&\+\-%@=\/\\\:;,\.'\"\^`~\_|\!\?\*$#<>()\[\]\{\}]/i"' '$result);$valueList1 = array();$valueList2 = array();$valresult = array();
$valueList1 explode(' ',$result);$valueList1 array_filter($valueList1);$valueList2 $valueList1;$valueList1 array_unique($valueList1);
foreach(
$valueList1 as $value1){    $ncount 0;    foreach($valueList2 as $value2){        if($value1 == $value2){            $ncount++;        }    }    if($ncount && strlen($value1) > 4){        $value['name'] = $value1;        $value['count'] = $ncount;        array_push($valresult,$value);            }
}
foreach(
$valresult as $key => $row){    $volume[$key]  = $row['count'];}array_multisort($volumeSORT_DESC$valresult);

function 
getUrltext($url$encoding true)  {        //ini_set('max_execution_time', 300);        //set_time_limit (300);        $ch = curl_init();        $headers = array('User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36');        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);        //curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 0);        /*curl_setopt($c, CURLOPT_PROXYTYPE, 'HTTP');        curl_setopt($ch, CURLOPT_PROXY, '92.114.43.122');        curl_setopt($ch, CURLOPT_PROXYPORT, '8080');        */        curl_setopt($ch,  CURLOPT_URL, $url);        //curl_setopt($ch, CURLOPT_ENCODING ,"windows-1251");        curl_setopt ($ch, CURLOPT_HEADER, 0);        curl_setopt($ch,  CURLOPT_RETURNTRANSFER, true);        //curl_setopt($ch, CURLOPT_COOKIEJAR,COOKIE);        //curl_setopt($ch,CURLOPT_COOKIEFILE,COOKIE);        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);        $result = curl_exec($ch);        if($encoding)            $result  = mb_convert_encoding($result,"utf-8","windows-1251");        curl_close ($ch);        return $result;  }
function getStrContent($startText ,$endText$strContent){    $strContent stristr($strContent$startText);    $strContent substr($strContentstrlen($startText));    $stop stripos($strContent,$endText);    $strContent substr($strContent,0,$stop);    return $strContent;}?>
<table>    <?php        foreach($valresult as $item){            echo "<tr><td>" $item['name'] . "</td><td>" $item['count'] . "</td></tr>";        }    ?></table>


grazie a chi saprà aiutarmi