1.使用Inet控件,只能得到部分源代码,有时候会出现奇怪的问题。
Private Function getHtmlStr$(strUrl$) If Inet.StillExecuting = False Then getHtmlStr = Inet.OpenURL(strUrl) Do While Inet.StillExecuting DoEvents Loop End If End Function
2.使用Inet控件,多了OpenURL的第二个参数,得到的和上面的一样,只是部分
Private Function getHtmlStr$(strUrl$) Dim bCode() As Byte Dim sTmp As String Dim i As Integer On Error Resume Next bCode = Inet.OpenURL(strUrl, 1) sTmp = "" For i = 0 To UBound(bCode) - 1 sTmp = sTmp & Chr(bCode(i)) Next i getHtmlStr = sTmp End Function
3.用的WebBrowser控件,最原始的方法,得到的代码是非常完整的
Private Function getHtmlStr$(strUrl$) WebBrowser1.Navigate strUrl Do While WebBrowser1.Busy DoEvents Loop getHtmlStr = WebBrowser1.Document.body.outerhtml End Function
4.使用"Microsoft.XMLHTTP"对象,这个方法用的比较多,因为它比较“绿色”
Private Function getHtmlStr$(strUrl$)
Dim XmlHttp As Object
Set XmlHttp = CreateObject("Microsoft.XMLHTTP")
XmlHttp.Open "GET", strUrl, False'这里可以用POST和GET,不过POST更好些,得到的网页始终最新
XmlHttp.send
getHtmlStr = StrConv(XmlHttp.ResponseBody, vbUnicode)
Set XmlHttp = Nothing
End Function
Dim XmlHttp, Sobj
Set XmlHttp = CreateObject("Microsoft.XMLHTTP")
XmlHttp.Open "Get", "http://www.ttcha.net/", False
XmlHttp.send
Set Sobj = CreateObject("ADODB.Stream")
Sobj.Type = 1
Sobj.Open
Sobj.Write XmlHttp.responseBody
Sobj.SaveToFile m_Path & "\index.html", 2
Sobj.Close
5.使用api函数,这个有个弊端,一开始速度很快,然后就会越来越慢
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long Private Function getHtmlStr$(strUrl$) Const tmpfile As String = "c:\tmp.tmp" URLDownloadToFile 0, strUrl, tmpfile, 0, 0 Do Until FileLen(tmpfile) DoEvents Loop Dim fn%, tmp$ fn = FreeFile Open tmpfile For Binary Access Read As #fn tmp = Space(FileLen(tmpfile)) Get #fn, , tmp Close #fn Kill tmpfile getHtmlStr = tmp End Function
另外附一个使用该对象下载文件的例子
Private Function URLDownloadToFileB(strSourceFile$, Optional strLocalFile$ = "Default") As Boolean
On Error GoTo Err1
Dim xObj, sObj+
Set xObj = CreateObject("Microsoft.XMLHTTP")
xObj.Open "GET", strSourceFile, 0
xObj.Send
Set sObj = CreateObject("ADODB.Stream")
sObj.Mode = 3
sObj.Type = 1
sObj.Open
sObj.Write (xObj.responseBody)
If strLocalFile = "Default" Then
strLocalFile = strFileShortName(strSourceFile)
End If
sObj.SaveToFile strLocalFile, 2
sObj.Close
Set xObj = Nothing
Set sObj = Nothing
URLDownloadToFileB = True
Err1:
Exit Function
URLDownloadToFileB = False
End Function
'返回短文件名,例如:"index.htm"
Public Function strFileShortName$(strFile$)
Dim intPos%
strFile = Trim(strFile)
intPos = InStrRev(strFile, "/")
If Mid(strFile, intPos - 1, 2) = "//" Or _
intPos = Len(strFile) Then '只指定了首页或者某个目录
strFileShortName = "temp"
Else
strFileShortName = Mid(strFile, intPos + 1)
End If
End Function6.使用"WinHttp.WinHttpRequest.5.1"对象,问题是中文字符全部出错!
Private Function getHtmlStr$(strUrl$)
On Error Resume Next
Set objhttp = CreateObject("WinHttp.WinHttpRequest.5.1")
If Err.Number <> 0 Then
Set objhttp = CreateObject("WinHttp.WinHttpRequest.5")
End If
objhttp.Open "GET", strUrl
objhttp.Send
getHtmlStr = objhttp.ResponseText
End Function
7.vb winsock获得网页源代码,是一段一段得到的,不知怎么才能知道它可以完成下载
'添加Winsock1,Command1,Text1
Private Sub Command1_Click() Dim lngPos&, strHost$, strUrl$ Text1.Text = Trim(Text1.Text) If Left(Text1.Text, 7) <> "http://" Then Text1.Text = "http://" & Text1.Text If InStr(8, Text1.Text, "/") = 0 Then Text1.Text = Text1.Text & "/" strUrl = Text1.Text lngPos = InStr(8, strUrl, "/") strHost = Mid(strUrl, 8, lngPos - 8) Me.AutoRedraw = True Me.Cls With Winsock1 .Close .RemoteHost = strHost .RemotePort = 80 .Connect End With End Sub Private Sub Winsock1_Connect() Dim strHttp As String strHttp = "GET " & Text1.Text & " HTTP/1.1" & vbCrLf strHttp = strHttp & "Accept-Language: zh-cn" & vbCrLf strHttp = strHttp & "Host:" & Winsock1.RemoteHost & vbCrLf strHttp = strHttp & "Connection: Keep-Alive" & vbCrLf Winsock1.SendData strHttp & vbCrLf End Sub Private Sub Winsock1_DataArrival(ByVal bytesTotal As Long) Dim o_strContent As String o_strContent = Space(bytesTotal) Winsock1.GetData o_strContent, vbString, bytesTotal Debug.Print o_strContent End Sub
8.使用Msxml2.XMLHTTP
Private Function getHtml(strUrl$) As String
With CreateObject("Msxml2.XMLHTTP")
.Open "GET", strUrl, False
.Send
getHtml = .responseText
End With
End Function
使用范例:
'采集百度id页面
Private Sub Command1_Click()
Dim url As String, p As Long, temp As String, i As Long, n As Long, v() As String
url = "http://ren.baidu.com/uquerys/?type=3&age_from=18&age_to=20&pn="
With CreateObject("Msxml2.XMLHTTP")
Do
p = p + 1
.Open "GET", url & p, False
.Send
temp = StrConv(.responsebody, vbUnicode, &H804)
If InStr(temp, "下一页") = 0 Then Exit Do
temp = Split(Split(temp, "s.src=")(2), """")(1)
.Open "geT", temp, False
.Send
v = Split(StrConv(.responsebody, vbUnicode, &H804), "username: ")
For i = 1 To UBound(v)
n = n + 1
Debug.Print n; Split(v(i), """")(1)
Next
Debug.Print
Loop
End With
End Sub