codice:Function StorePageData() StrSQL = "SELECT A.PageURL AS P1, " &_ "B.PageURL AS P2 " &_ "FROM tblURL A " &_ "LEFT JOIN tblPages B " &_ "ON B.PageURL = A.PageURL" ObjRS.Open StrSQL, ObjConn, 1, 3 Call GetError() If Not ObjRS.EOF Then Do Until ObjRs.EOF If Right(ObjRS("P1"),5) = ".html" Then Call LocateCodeInPage(SiteURL & ObjRS("P1"), "<title>", Array("</title>"), PageTitle) PageTitle = Replace(PageTitle,"""","") If Left(PageTitle,Len(TitleToRemove)) = TitleToRemove Then PageTitle = Mid(PageTitle,(Len(TitleToRemove) + 1)) End If Call LocateCodeInPage(SiteURL & ObjRS("P1"), "name=""keywords"" content=", Array("/>",">"), PageKeywords) PageKeywords = Replace(PageKeywords,"""","") Call LocateCodeInPage(SiteURL & ObjRS("P1"), "name=""description"" content=", Array("/>",">"), PageDescription) PageDescription = Replace(PageDescription,"""","") Call LocateCodeInPage(SiteURL & ObjRS("P1"), "</head>", Array("</body>"), PageContent) PageContent = StringsToRemove(PageContent) PageContent = Replace(PageContent," ","") PageContent = Replace(PageContent,vbCrLf," ") PageContent = Trim(PageContent) PageContent = RemoveHTML(PageContent) ElseIf Right(ObjRS("P1"),4) = ".doc" Then Call GetWordDocumentContent(SiteURL & ObjRS("P1")) If Left(PageTitle,17) = "Microsoft Word - " Then PageTitle = Replace(PageTitle,"Microsoft Word - ","") End If If Right(PageTitle,4) = ".doc" Then PageTitle = Left(PageTitle,(Len(PageTitle) - 4)) End If Else PageTitle = "" PageKeywords = "" PageDescription = "" PageContent = "" End If If IsNull(ObjRS("P2")) Or Len(ObjRS("P2")) = 0 Then StrSQLInsert = "INSERT INTO tblPages (" &_ "PageURL, " &_ "PageTitle, " &_ "PageKeywords, " &_ "PageDescription, " &_ "PageContent, " &_ "PageRank, " &_ "DateTime) " &_ "VALUES (" &_ "'" & Replace(ObjRS("P1"),"'","''") & "', " &_ "'" & Replace(PageTitle,"'","''") & "', " &_ "'" & Replace(PageKeywords,"'","''") & "', " &_ "'" & Replace(PageDescription,"'","''") & "', " &_ "'" & Replace(PageContent,"'","''") & "', " &_ "0, " &_ "'" & DateTime & "')" Else StrSQLInsert = "UPDATE tblPages SET " &_ "PageTitle = '" & Replace(PageTitle,"'","''") & "', " &_ "PageKeywords = '" & Replace(PageKeywords,"'","''") & "', " &_ "PageDescription = '" & Replace(PageDescription,"'","''") & "', " &_ "PageContent = '" & Replace(PageContent,"'","''") & "', " &_ "PageRank = 0, " &_ "DateTime = '" & DateTime & "' " &_ "WHERE PageURL = '" & Replace(ObjRS("P1"),"'","''") & "'" End If ObjConn.Execute(StrSQLInsert) Call GetError() ObjRS.MoveNext Loop End If ObjRS.Close Call DeleteOldReferences() Call BuildPageRank() End Function Function DeleteOldReferences() ObjConn.Execute("DELETE FROM tblPages WHERE DateTime <> '" & DateTime & "'") ObjConn.Execute("DELETE FROM tblPages WHERE LEFT(PageURL,1) != '/'") ObjConn.Execute("DELETE FROM tblURL WHERE LEFT(PageURL,1) != '/'") Call GetError() End Function Function RemoveHTML(TheContent) With ObjRegExp .Pattern = "<[^>]*>" .IgnoreCase = True .Global = True End With TheContent = Replace(TheContent," "," ") TheContent = Replace(TheContent,"</p>"," ") RemoveHTML = ObjRegExp.Replace(TheContent, "") End Function Function GetTextFromPage(ThePageURL) ObjXMLHttp.Open "GET", SiteURL & ThePageURL, False ObjXMLHttp.Send Call GetError() URLStatus = CInt(ObjXMLHttp.Status) If URLStatus = 200 Then If ObjXMLHttp.ReadyState = 4 Then GetTextFromPage = ObjXMLHttp.ResponseText End If Else Exit Function End If End Function Function CheckDomainURL(TheURL) SkipURL = False If Left(LCase(TheURL),7) = "http://" Then If Left(LCase(TheURL),Len(SiteURL)) = LCase(SiteURL) Then TheURL = Mid(TheURL,(Len(SiteURL) + 1)) ElseIf Not Left(TheURL,1) = "/" Then SkipUrl = True Else SkipURL = True End If End If CheckDomainURL = TheURL End Function Function StringsToRemove(TheText) For k = 0 To UBound(ArrayFileToRemove) TheTextToRemove = GetTextFromPage(ArrayFileToRemove(k)) TheText = Replace(TheText,TheTextToRemove,"") Next For k = 0 To UBound(ArrayStringsToRemove) TheText = Replace(TheText,ArrayStringsToRemove(k),"") Next StringsToRemove = TheText End Function Function InitializeCatalog() ObjConn.Execute("DELETE FROM tblURL") Call GetError() End Function Function LoopFunctions() While Not StopSpider Call IndexPage(ValidDocExtToIndex) Wend If StopSpider Then Call StorePageData() End If End Function iYear = DatePart("yyyy",Date()) iMonth = DatePart("m",Date()) If iMonth < 10 Then iMonth = "0" & iMonth iDay = DatePart("d",Date()) If iDay < 10 Then iDay = "0" & iDay iHour = DatePart("h",Time()) If iHour < 10 Then iHour = "0" & iHour iMinutes = DatePart("n",Time()) If iMinutes < 10 Then iMinutes = "0" & iMinutes iSeconds = DatePart("s",Time()) If iSeconds < 10 Then iSeconds = "0" & iSeconds DateTime = iYear & iMonth & iDay & iHour & iMinutes & iSeconds Set ObjConn = CreateObject("ADODB.Connection") ObjConn.Open "Driver={SQL Server};Server=localhost;Database=Search;Uid=utente;Pwd=password;" Call GetError() Set ObjRS = CreateObject("ADODB.Recordset") Set ObjXMLHttp = CreateObject("Msxml2.XMLHTTP.4.0") Call GetError() Set ObjRegExp = new RegExp SiteURL = "http://www.nomedelsito.it" TitleToRemove = "Blocco statico titolo pagina - " DocExt = Array(".html",".doc") ValidDocExtToIndex = Array(".html") ArrayStringsToRemove = Array("Frase comune in tutte le pagine 1","Frase comune in tutte le pagine 2") ReDim ArrayFileToRemove(2) ArrayFileToRemove(0) = "/include/menu.html" ArrayFileToRemove(1) = "/include/testata.html" ArrayFileToRemove(2) = "/include/footer.html" Smtp = "smtp.nomesito.it" Port = "25" Authenticate = 0 ' 1 = con autenticazione; 0 = senza autenticazione MailUser = "" MailPass = "" MailSender = "mittente@nomesito.it" MailRecipient = "destinatario@nomesito.it" Call InitializeCatalog() Call getLinksInPage(SiteURL, "href=", Array(" ",">"), DocExt) Call LoopFunctions() Set ObjRegExp = Nothing Set ObjXMLHttp = Nothing Set ObjRS = Nothing ObjConn.Close Set ObjConn = Nothing MsgBox "End Of Script"