codice:
Public Function StripHTMLTags(ByVal strContent As String) As String
' ---
'
' Receives strContent, expecting it to c
' ontain
' html tags, and returns it
' with tags removed (as best as it can)
'
' ---
On Error Resume Next
'
Dim mString As String
Dim mStartPos As Long
Dim mEndPos As Long
Dim i As Long
Dim j As Long
' --------------------------------------
' -------------
' Start process
'
' FIRST, REMOVE ALL LINE BREAKS
'
strContent = Replace$(strContent, vbCr, "")
strContent = Replace$(strContent, vbLf, "")
'
' now, replace
and <br /&g
' t; tags
'
strContent = Replace$(strContent, "
", vbCrLf)
strContent = Replace$(strContent, "
", vbCrLf)
strContent = Replace$(strContent, "
", vbCrLf)
'
' DIVS and SPANS and PARAGRAPHS -- conve
' rt to vbCrLf x2
strContent = Replace$(strContent, "<div", vbCrLf & vbCrLf & "<div")
strContent = Replace$(strContent, "<span", vbCrLf & vbCrLf & "<span")
strContent = Replace$(strContent, "<p", vbCrLf & vbCrLf & "<p")
'
' IF THERE IS A <head> section, ge
' t rid of it
' since the lines beyond this will not p
' roperly
' pick up javascript
'
If InStr(1, strContent, "<head", vbTextCompare) > 0 Then
Dim lastChar As Long
lastChar = InStr(2, strContent, "</head>", vbTextCompare) + 6
strContent = Right$(strContent, Len(strContent) - lastChar)
End If
'
' NOW LETS TRY TO REMOVE ALL TAGS
'
mStartPos = InStr(strContent, "<")
mEndPos = InStr(strContent, ">")
'
Do While mStartPos <> 0 And mEndPos <> 0 And mEndPos > mStartPos
mString = Mid(strContent, mStartPos, mEndPos - mStartPos + 1)
strContent = Replace(strContent, mString, "")
mStartPos = InStr(strContent, "<")
mEndPos = InStr(strContent, ">")
Loop
'
'
' Translate common escape sequence chars
' ------------------
'
strContent = Replace(strContent, "_", " ")
strContent = Replace(strContent, "&", "&")
' strContent = Replace(strContent, """, "'")
strContent = Replace(strContent, "?", "#")'ecc eccc
strContent = LTrim(Trim(strContent))
'
' THERE ARE A BOATLOAD MORE OF THESE, BU
' T DON'T WANT TO
' SLOW DOWN TOO MUCH
' but feel free to add your own to this
' section
' --------------------------------------
' -------------------
'
' Remove leading/trailing space
Do While Left(strContent, 1) = Chr$(13) Or Left(strContent, 1) = Chr$(10)
strContent = Mid(strContent, 2)
Loop
'
'
' SEND BACK THE RESULT
'
StripHTMLTags = strContent
End Function