Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long Const DEMO_URL = "http://www.google.com"
Public Function GetHTMLTextFromURL(Optional url As String = "http://www.binaryworld.net", Optional SaveOnDisk As Boolean = False, Optional LocalPath As String = "C:\dow.htm") As String Const TEMP_FILE = "c:\_tmp"
On Error GoTo errHandler
Dim lngRetVal As Long
If SaveOnDisk = True Then lngRetVal = URLDownloadToFile(0, url, LocalPath, 0, 0) If lngRetVal = 0 Then GetHTMLTextFromURL = LocalPath Else lngRetVal = URLDownloadToFile(0, url, TEMP_FILE, 0, 0) If lngRetVal <> 0 Then GetHTMLTextFromURL = "" Else GetHTMLTextFromURL = ReadFromFile(TEMP_FILE) End If
On Error Resume Next Kill TEMP_FILE End If
Exit Function
errHandler: On Error Resume Next Kill TEMP_FILE End Function
Public Function SaveToFile(Content As String, _ FilePath As String, Optional Append As Boolean = False) _ As Boolean Dim iFile As Integer iFile = FreeFile If Append Then Open FilePath For Append As #iFile Else Open FilePath For Output As #iFile End If
Print #iFile, Content SaveToFile = True
ErrorHandler: Close #iFile End Function
Public Function ReadFromFile(FilePath As String) As String On Error GoTo errHandler
Dim iFile As Integer
If Dir(FilePath) = "" Then Exit Function
On Error GoTo errHandler:
iFile = FreeFile Open FilePath For Input As #iFile ReadFromFile = Input(LOF(iFile), #iFile) errHandler: If iFile > 0 Then Close #iFile End Function
Function GetSubMatch(inpStr, inpPattern, Optional MatchIndex As Integer = 0) As String On Error Resume Next
Dim regEx, oMatch, oMatches Set regEx = CreateObject("VBScript.RegExp")
regEx.Pattern = inpPattern regEx.IgnoreCase = True regEx.Global = True
Set oMatches = regEx.Execute(inpStr) Set oMatch = oMatches(0) retStr = oMatch.SubMatches(MatchIndex)
GetSubMatch = retStr End Function
Function GetAllMatch(patrn As String, strng As String, Optional SubMatchIndex As Integer = -1) As Collection Dim c As New Collection Dim regEx, Match, Matches, i Set regEx = CreateObject("VBScript.RegExp")
regEx.Pattern = patrn regEx.IgnoreCase = True regEx.Global = True
Set Matches = regEx.Execute(strng) For Each Match In Matches i = i + 1 If SubMatchIndex >= 0 Then c.Add Match.SubMatches(SubMatchIndex) Else c.Add Match.Value End If Next Set GetAllMatch = c End Function
Private Sub Command1_Click() Dim strRegX As String, strRegXHREF As String Dim colmatches As New Collection
Me.Caption = "Downloading the html page..." Text1.Text = GetHTMLTextFromURL(Text2) Me.Caption = "APIDemo"
Call RegXDemo
End Sub
Private Sub Form_Load() Text2.Text = "http://binaryworld.net" Command1.Caption = "&Go" Combo1.AddItem "Find Only href from <A> tag" Combo1.AddItem "Find all <A> tags" Combo1.AddItem "Find only linked text" Combo1.AddItem "Find Only src from <img> tag" Combo1.AddItem "Find all <img> tags" Combo1.ListIndex = 0 End Sub
Sub RegXDemo() Dim strRegX As String Debug.Print String(80, "=") List1.Clear Select Case Combo1.ListIndex Case 0 strRegX = "<\s*A(.*?)href=['""]*(.*?)['""](.*?)>(.*?)<\s*/A\s*>" Debug.Print strRegX Set colmatches = GetAllMatch(strRegX, Text1, 1) Case 1 strRegX = "<\s*A(.|\n)*?\s*>((.|\n)*?)<\s*\/A\s*>" Set colmatches = GetAllMatch(strRegX, Text1) Case 2 strRegX = "<\s*A(.*?)href=['""]*(.*?)['""](.*?)>(.*?)<\s*/A\s*>" Set colmatches = GetAllMatch(strRegX, Text1, 2) Case 3 strRegX = "<\s*IMG(.*?)src=['""]*(.*?)['""](.*?)>" Set colmatches = GetAllMatch(strRegX, Text1, 1) Case 4 strRegX = "<\s*IMG(.|\n)*?\s*>" Set colmatches = GetAllMatch(strRegX, Text1) End Select
If colmatches.Count > 0 Then For i = 1 To colmatches.Count If colmatches(i) <> "" Then List1.AddItem colmatches(i): Debug.Print colmatches(i) Next End If
Me.Caption = "Total " & i & " match found"
ShowSamplePage End Sub
Private Sub Form_Unload(Cancel As Integer) On Error Resume Next Kill "c:\Out.html" End Sub
Private Sub List1_Click() If List1.ListIndex >= 0 Then
MsgBox List1.List(List1.ListIndex) Clipboard.SetText List1.List(List1.ListIndex), ClipBoardConstants.vbCFText End If End Sub
Sub ShowSamplePage() Dim strHTML As String, strItems As String, i, itm, url For i = 0 To List1.ListCount - 1 Select Case Combo1.ListIndex Case 1, 2, 4 itm = List1.List(i) Case 0, 3 itm = Trim(List1.List(i)) url = Trim(Text2) url = IIf(Right(url, 1) = "/" Or Right(url, 1) = "\", url, url & "/")
If Left(itm, 1) <> "/" And Left(itm, 1) <> "\" Then If Left(itm, 4) <> "http" Then itm = url & itm End If Else itm = Mid(url, 1, Len(url) - 1) & itm End If
If Combo1.ListIndex = 0 Then itm = "<a href='" & itm & "'>" & itm & "</a>" Else itm = "<img src='" & itm & "'> " & itm End If End Select
strItems = strItems & "<TR><TD>" & itm & "</TD></TR>" & vbCrLf Next strHTML = "<HTML><HEAD><TITLE>" & List1.ListCount & " Items found</TITLE></HEAD>" & _ "<BODY><TABLE><TR><TD><H2>Extracted Items (" & List1.ListCount & " Items found)</H2></TD></TR>" & strItems & "</TABLE></BODY></HTML>"
Debug.Print strHTML
SaveToFile strHTML, "C:\Out.html" Shell "explorer C:\Out.html", vbNormalFocus End Sub |