Function UnescapeHtml(strHtml)
Dim strOut, strChar, strWord
Dim lngLength, lngPosition, lngChar
strOut = strHtml
'Replace decimal and unicode escapes.
lngPosition = Instr(strOut, "")
Do While lngPosition <> 0
'Get the number after the sequence
strChar = FirstNumber(Mid(strOut, lngPosition, 6))
lngLength = Len("" & strChar)
'Is there a trailing semicolon?
If Mid(strOut, lngPosition + lngLength, 1) = ";" Then
lngLength = lngLength + 1
End If
'Convert the number after to an ASCII or Unicode character
lngChar = CLng(strChar)
If lngChar > 255 Then
strChar = ChrW(lngChar)
Else
strChar = Chr(lngChar)
End If
'Replace the escaped sequence with the actual character
strOut = Left(strOut, lngPosition - 1) & strChar & Mid(strOut, lngPosition + lngLength)
lngPosition = Instr(strOut, "")
Loop
'Replace common workaround for TradeMark character
Do While Instr(strOut, "TM") <> 0 : strOut = Replace(strOut, "TM", Chr(153)) : Loop
Do While Instr(strOut, "TM") <> 0 : strOut = Replace(strOut, "TM", Chr(153)) : Loop
'Replace items with duplicate codes
ReplaceHtmlEscape strOut, "−", 45
ReplaceHtmlEscape strOut, "/", 47
ReplaceHtmlEscape strOut, "―", 95
ReplaceHtmlEscape strOut, "&ldots;", 133
ReplaceHtmlEscape strOut, "’", 145
ReplaceHtmlEscape strOut, "”", 147
ReplaceHtmlEscape strOut, "&endash;", 150
ReplaceHtmlEscape strOut, "—", 151
ReplaceHtmlEscape strOut, "&brkbar;", 166
ReplaceHtmlEscape strOut, "¨", 168
ReplaceHtmlEscape strOut, "½", 189
ReplaceHtmlEscape strOut, "€", 128
'Now continue unescaping everything else
lngChar = 32
For Each strWord In Split("sp excl quot num dollar percnt amp apos lpar rpar ast plus comma hyphen period")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 58
For Each strWord In Split("colon semi lt equals gt quest commat")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 91
For Each strWord In Split("lsqb bsol rsqb circ lowbar grave")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 123
For Each strWord In Split("lcub verbar rcub tilde")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 130
For Each strWord In Split("lsquor fnof ldquor hellip dagger Dagger")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 137
For Each strWord In Split("permil Scaron lsaquo OElig")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 145
For Each strWord In Split("lsquo rsquo ldquo rdquo bull ndash emdash tilde trade scaron rsaquo oelig")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 159
For Each strWord In Split("Yuml nbsp iexcl cent pound curren yen brvbar sect uml")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 169
For Each strWord In Split("copy ordf laquo not shy reg macr hibar deg plusmn sup2 sup3 acute micro para middot cedil sup1 ordm raquo frac14 frac12")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
lngChar = 190
For Each strWord In Split("frac34 iquest Agrave Aacute Acirc Atilde Auml Aring AElig Ccedil Egrave Eacute Ecirc Euml Igrave Iacute Icirc Iuml ETH Ntilde Ograve Oacute Ocirc Otilde Ouml times Oslash Ugrave Uacute Ucirc Uuml Yacute THORN szlig agrave aacute acirc atilde auml aring aelig ccedil egrave eacute ecirc euml igrave iacute icirc iuml eth ntilde ograve oacute ocirc otilde ouml divide oslash ugrave uacute ucirc uuml yacute thorn yuml")
ReplaceHtmlEscape strOut, strWord, lngChar
lngChar = lngChar + 1
Next
UnescapeHtml = strOut
End Function
Sub ReplaceHtmlEscape(ByRef strHtmlText, strEscapeString, lngCharCode)
'MODIFIES INPUT DATA strHtmlText
Dim strEscape, strCharCode
'Starting values
strEscape = strEscapeString
strCharCode = Chr(lngCharCode)
'First make the escape string "Microsoft format" (no trailing semicolon).
If Left(strEscape, 1) <> "&" Then strEscape = "&" & strEscape
If Right(strEscape, 1) = ";" Then strEscape = Left(strEscape, Len(strEscape) - 1)
'Replace standard format
Do While Instr(strHtmlText, strEscape & ";") <> 0
strHtmlText = Replace(strHtmlText, strEscape & ";", strCharCode, 1, -1, vbTextCompare)
Loop
'Replace Microsoft format
Do While Instr(strHtmlText, strEscape) <> 0
strHtmlText = Replace(strHtmlText, strEscape, strCharCode, 1, -1, vbTextCompare)
Loop
End Sub