Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" ( _ ByVal pCaller As Long, _ ByVal szURL As String, _ ByVal szFileName As String, _ ByVal dwReserved As Long, _ ByVal lpfnCB As Long) As Long
Const DEMO_URL = "http://www.google.com"
Public Function GetHTMLTextFromURL(Optional URL As String = "http://www.binaryworld.net", Optional SaveOnDisk As Boolean = False, Optional LocalPath As String = "C:\dow.htm") As String Const TEMP_FILE = "c:\_tmp"
On Error GoTo errHandler
Dim lngRetVal As Long
If SaveOnDisk = True Then lngRetVal = URLDownloadToFile(0, URL, LocalPath, 0, 0) If lngRetVal = 0 Then GetHTMLTextFromURL = LocalPath Else lngRetVal = URLDownloadToFile(0, URL, TEMP_FILE, 0, 0) If lngRetVal <> 0 Then GetHTMLTextFromURL = "" Else GetHTMLTextFromURL = ReadFromFile(TEMP_FILE) End If
On Error Resume Next Kill TEMP_FILE End If
Exit Function
errHandler: On Error Resume Next Kill TEMP_FILE End Function
Public Function ReadFromFile(FilePath As String) As String On Error GoTo errHandler
Dim iFile As Integer
If Dir(FilePath) = "" Then Exit Function
On Error GoTo errHandler:
iFile = FreeFile Open FilePath For Input As #iFile ReadFromFile = Input(LOF(iFile), #iFile) errHandler: If iFile > 0 Then Close #iFile End Function
Function GetSubMatch(inpStr, inpPattern, Optional MatchIndex As Integer = 0) As String On Error Resume Next
Dim regEx, oMatch, oMatches Set regEx = CreateObject("VBScript.RegExp")
regEx.Pattern = inpPattern regEx.IgnoreCase = True regEx.Global = True
Set oMatches = regEx.Execute(inpStr) Set oMatch = oMatches(0) retStr = oMatch.SubMatches(MatchIndex)
GetSubMatch = retStr End Function
Function GetAllMatch(patrn As String, strng As String, Optional SubMatchIndex As Integer = -1) As Collection Dim c As New Collection Dim regEx, Match, Matches, I Set regEx = CreateObject("VBScript.RegExp")
regEx.Pattern = patrn regEx.IgnoreCase = True regEx.Global = True
Set Matches = regEx.Execute(strng) For Each Match In Matches I = I + 1 If SubMatchIndex >= 0 Then c.Add Match.SubMatches(SubMatchIndex) Else c.Add Match.Value End If Next Set GetAllMatch = c End Function
Private Sub Command1_Click() Dim strRegX As String, strRegXHREF As String Dim colmatches As New Collection
Me.Caption = "Downloading the html page..." Text1.Text = GetHTMLTextFromURL(DEMO_URL) Me.Caption = "APIDemo"
MsgBox "This demo will grab all hyperlink Tags (i.e. <A> Anything </A>)" Call DemoGrabHTMLTag
End Sub
Private Sub Command2_Click() Dim strRegX As String, strRegXHREF As String Dim colmatches As New Collection
Me.Caption = "Downloading the html page..." Text1.Text = GetHTMLTextFromURL(DEMO_URL) Me.Caption = "APIDemo"
MsgBox "This demo will grab only URLs from Hyperlink Tags (i.e. only www.mysite.com from <A href=""www.mysite.com""></A> )" Call DemoGrabURLOnly End Sub
Private Sub Form_Load() Command2.Caption = "Grab URL" Command1.Caption = "Grab <A> Tag" End Sub
Sub DemoGrabHTMLTag()
Dim strRegX As String Debug.Print String(80, "=") List1.Clear strRegX = "<\s*A(.|\n)*?\s*>((.|\n)*?)<\s*\/A\s*>" Set colmatches = GetAllMatch(strRegX, Text1) If colmatches.Count > 0 Then For I = 1 To colmatches.Count If colmatches(I) <> "" Then List1.AddItem colmatches(I): Debug.Print colmatches(I) Next End If Me.Caption = "Total " & I & " <A> tag(s) found" End Sub
Sub DemoGrabURLOnly() Dim strRegXHREF As String, strRegX As String List1.Clear
strRegXHREF = "<\s*a(.|\n)*?\s*href=([\\""'])*([?/.\\]*\w*[.?\://]*(\w*[?&#=;$%-/.])*\w*)([""'])*(.|\n^>)*?\s*>((.|\n)*?)<\s*\/a\s*>" Set colmatches = GetAllMatch(strRegXHREF, Text1, 2)
If colmatches.Count > 0 Then For I = 1 To colmatches.Count OnlyURL = colmatches(I) If OnlyURL <> "" Then List1.AddItem OnlyURL Debug.Print OnlyURL End If Next End If
Me.Caption = "Total " & I & " URLs found" End Sub
Private Sub List1_Click() If List1.ListIndex >= 0 Then
MsgBox List1.List(List1.ListIndex) Clipboard.SetText List1.List(List1.ListIndex), ClipBoardConstants.vbCFText End If End Sub |