1.使用Inet控件,只能得到部分源代码,有时候会出现奇怪的问题。
Private Function getHtmlStr$(strUrl$) If Inet.StillExecuting = False Then getHtmlStr = Inet.OpenURL(strUrl) Do While Inet.StillExecuting DoEvents Loop End If End Function
2.使用Inet控件,多了OpenURL的第二个参数,得到的和上面的一样,只是部分
Private Function getHtmlStr$(strUrl$) Dim bCode() As Byte Dim sTmp As String Dim i As Integer On Error Resume Next bCode = Inet.OpenURL(strUrl, 1) sTmp = "" For i = 0 To UBound(bCode) - 1 sTmp = sTmp & Chr(bCode(i)) Next i getHtmlStr = sTmp End Function
3.用的WebBrowser控件,最原始的方法,得到的代码是非常完整的
Private Function getHtmlStr$(strUrl$) WebBrowser1.Navigate strUrl Do While WebBrowser1.Busy DoEvents Loop getHtmlStr = WebBrowser1.Document.body.outerhtml End Function
4.使用"Microsoft.XMLHTTP"对象,这个方法用的比较多,因为它比较“绿色”
Private Function getHtmlStr$(strUrl$) Dim XmlHttp As Object Set XmlHttp = CreateObject("Microsoft.XMLHTTP") XmlHttp.Open "GET", strUrl, False'这里可以用POST和GET,不过POST更好些,得到的网页始终最新 XmlHttp.send getHtmlStr = StrConv(XmlHttp.ResponseBody, vbUnicode) Set XmlHttp = Nothing End Function Dim XmlHttp, Sobj Set XmlHttp = CreateObject("Microsoft.XMLHTTP") XmlHttp.Open "Get", "http://www.ttcha.net/", False XmlHttp.send Set Sobj = CreateObject("ADODB.Stream") Sobj.Type = 1 Sobj.Open Sobj.Write XmlHttp.responseBody Sobj.SaveToFile m_Path & "\index.html", 2 Sobj.Close
5.使用api函数,这个有个弊端,一开始速度很快,然后就会越来越慢
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long Private Function getHtmlStr$(strUrl$) Const tmpfile As String = "c:\tmp.tmp" URLDownloadToFile 0, strUrl, tmpfile, 0, 0 Do Until FileLen(tmpfile) DoEvents Loop Dim fn%, tmp$ fn = FreeFile Open tmpfile For Binary Access Read As #fn tmp = Space(FileLen(tmpfile)) Get #fn, , tmp Close #fn Kill tmpfile getHtmlStr = tmp End Function
另外附一个使用该对象下载文件的例子
Private Function URLDownloadToFileB(strSourceFile$, Optional strLocalFile$ = "Default") As Boolean On Error GoTo Err1 Dim xObj, sObj+ Set xObj = CreateObject("Microsoft.XMLHTTP") xObj.Open "GET", strSourceFile, 0 xObj.Send Set sObj = CreateObject("ADODB.Stream") sObj.Mode = 3 sObj.Type = 1 sObj.Open sObj.Write (xObj.responseBody) If strLocalFile = "Default" Then strLocalFile = strFileShortName(strSourceFile) End If sObj.SaveToFile strLocalFile, 2 sObj.Close Set xObj = Nothing Set sObj = Nothing URLDownloadToFileB = True Err1: Exit Function URLDownloadToFileB = False End Function '返回短文件名,例如:"index.htm" Public Function strFileShortName$(strFile$) Dim intPos% strFile = Trim(strFile) intPos = InStrRev(strFile, "/") If Mid(strFile, intPos - 1, 2) = "//" Or _ intPos = Len(strFile) Then '只指定了首页或者某个目录 strFileShortName = "temp" Else strFileShortName = Mid(strFile, intPos + 1) End If End Function
6.使用"WinHttp.WinHttpRequest.5.1"对象,问题是中文字符全部出错!
Private Function getHtmlStr$(strUrl$) On Error Resume Next Set objhttp = CreateObject("WinHttp.WinHttpRequest.5.1") If Err.Number <> 0 Then Set objhttp = CreateObject("WinHttp.WinHttpRequest.5") End If objhttp.Open "GET", strUrl objhttp.Send getHtmlStr = objhttp.ResponseText End Function
7.vb winsock获得网页源代码,是一段一段得到的,不知怎么才能知道它可以完成下载
'添加Winsock1,Command1,Text1
Private Sub Command1_Click() Dim lngPos&, strHost$, strUrl$ Text1.Text = Trim(Text1.Text) If Left(Text1.Text, 7) <> "http://" Then Text1.Text = "http://" & Text1.Text If InStr(8, Text1.Text, "/") = 0 Then Text1.Text = Text1.Text & "/" strUrl = Text1.Text lngPos = InStr(8, strUrl, "/") strHost = Mid(strUrl, 8, lngPos - 8) Me.AutoRedraw = True Me.Cls With Winsock1 .Close .RemoteHost = strHost .RemotePort = 80 .Connect End With End Sub Private Sub Winsock1_Connect() Dim strHttp As String strHttp = "GET " & Text1.Text & " HTTP/1.1" & vbCrLf strHttp = strHttp & "Accept-Language: zh-cn" & vbCrLf strHttp = strHttp & "Host:" & Winsock1.RemoteHost & vbCrLf strHttp = strHttp & "Connection: Keep-Alive" & vbCrLf Winsock1.SendData strHttp & vbCrLf End Sub Private Sub Winsock1_DataArrival(ByVal bytesTotal As Long) Dim o_strContent As String o_strContent = Space(bytesTotal) Winsock1.GetData o_strContent, vbString, bytesTotal Debug.Print o_strContent End Sub
8.使用Msxml2.XMLHTTP
Private Function getHtml(strUrl$) As String With CreateObject("Msxml2.XMLHTTP") .Open "GET", strUrl, False .Send getHtml = .responseText End With End Function
使用范例:
'采集百度id页面
Private Sub Command1_Click() Dim url As String, p As Long, temp As String, i As Long, n As Long, v() As String url = "http://ren.baidu.com/uquerys/?type=3&age_from=18&age_to=20&pn=" With CreateObject("Msxml2.XMLHTTP") Do p = p + 1 .Open "GET", url & p, False .Send temp = StrConv(.responsebody, vbUnicode, &H804) If InStr(temp, "下一页") = 0 Then Exit Do temp = Split(Split(temp, "s.src=")(2), """")(1) .Open "geT", temp, False .Send v = Split(StrConv(.responsebody, vbUnicode, &H804), "username: ") For i = 1 To UBound(v) n = n + 1 Debug.Print n; Split(v(i), """")(1) Next Debug.Print Loop End With End Sub