在用WebBrowser编程实现网页操作自动化时,常要分析网页Html,例如网页在加载数据时,常会显示“系统处理中,请稍候..”,我们需要在数据加载完成后才能继续下一步操作,如何抓取这个信息的网页html元素变化,从而判断数据加载完毕呢?用IE开发者工具是不可能抓取到的,太快了。(当然,设置足够长的延时,也是可以实现的,只是不够科学及稳妥,毕竟有时因为网络原因,数据加载时间可能超过原来设定时间,其次,设置延时过长也导致程序不够友好)
实现的办法:
1、先用“系统处理中”查找(泛查找),并在找到html中,再细找缩小html元素范围。
bb = FindHtmlElement("系统处理中", ExtendedWebBrowser1.Document, "", "InnerText", false)
2、添加一个Timer控件,设定100毫秒。根据 1中找到的元素,进行不断抓取,并将抓到的结果输出到文本。
3、将2中输出,导入Excel,进行筛选,并从中找到重复次数少的行,便是数据加载、加载完成之间的变化。
Private Sub TimerProgress_Tick(sender As Object, e As EventArgs) Handles TimerProgress.TickIf Gethtmel ThenDim bb As HtmlElementbb = FindHtmlElement("all_jzts", ExtendedWebBrowser1.Document, "div", "id", True)If Not bb Is Nothing Then'WriteRunLog("Style : " + bb.Style)WriteRunLog(bb.OuterHtml)ElseWriteRunLog("all_jzts没找到")End Ifbb = FindHtmlElement("jzts", ExtendedWebBrowser1.Document, "div", "id", True)If Not bb Is Nothing Then'WriteRunLog("Style : " + bb.Style)WriteRunLog(bb.OuterHtml)ElseWriteRunLog("jzts没找到")End If'Gethtmel = FalseEnd If'系统处理中,请稍候...Application.DoEvents()End Sub
Function FindHtmlElement(ByVal FindText As String, ByVal doc As HtmlDocument, ByVal cTagName As String, ByVal cGetAttribute As String, Optional ByVal StrictMatching As Boolean = False) As HtmlElement'cTagName:检索具有指定 html 标记的元素,标记需要输入完整的,缺省时查找所有。'例如:<input class="button" type="submit" value=提交 style="cursor:hand">,不能只输入"i",需要输入"input"'cGetAttribute :比较的属性类型,取值为:Id、InnerText、Name、title、classname、value、'Id、InnerText可以通过GetAttribute获取,也可以通过HtmlElement.Id、HtmlElement.InnerText获取,所以代码简化为用GetAttribute获取。'doc:WebBrowserExt1.Document'GetAttribute("classname") '例如显示class="commonTable"的值commonTable'StrictMatching:True严格匹配FindText'WriteRunLog("FindHtmlElement开始:" + FindText)TryDim i, k As IntegerFindHtmlElement = NothingFindHtmlElementOfDocument = docIf doc Is Nothing Then '2023.11.15在递归调用中,因为有些iFrames还未真正加载,从而导致传入的doc = doc.Window.Frames.Item(k).Document 为 Nothing ,从而引发异常:未将对象引用设置到对象的实例。Exit FunctionEnd IfIf LCase(cGetAttribute) = "innertext" Then 'InnerText必须严格匹配,否则找到的结果是错误的。’StrictMatching = TrueEnd IfIf cTagName <> "" ThenDim EE As HtmlElementCollection = doc.GetElementsByTagName(cTagName)For i = 0 To EE.Count - 1If InStr(EE.Item(i).GetAttribute(cGetAttribute), FindText) > 0 _And (Not StrictMatching Or InStr(FindText, EE.Item(i).GetAttribute(cGetAttribute)) > 0) ThenFindHtmlElement = EE.Item(i)'WriteRunLog("Loop1")'WriteRunLog("FindHtmlElement结束0")Exit Function '找到就退出End IfNextElseFor i = 0 To doc.All.Count - 1If InStr(doc.All.Item(i).GetAttribute(cGetAttribute), FindText) > 0 _And (Not StrictMatching Or InStr(FindText, doc.All.Item(i).GetAttribute(cGetAttribute)) > 0) And (cTagName = "" Or LCase(cTagName) = LCase(doc.All.Item(i).TagName)) ThenFindHtmlElement = doc.All.Item(i)'WriteRunLog("Loop1")'WriteRunLog("FindHtmlElement结束0")Exit Function '找到就退出End IfNextEnd If'上面没找到,进行递归调用,递归会查找所有嵌套的Frame。For k = 0 To doc.Window.Frames.Count - 1'If k = 0 Then' WriteRunLog("递归调用 doc.Window.Frames.Count:" + doc.Window.Frames.Count.ToString) 'For Test'End If'2018.3.14 直接 递归调用'WriteRunLog("递归调用:" + Str(k))' WriteRunLog("doc.Window.Frames.Item(k).Name:" + doc.Window.Frames.Item(k).Name)FindHtmlElementOfDocument = doc.Window.Frames.Item(k).DocumentFindHtmlElement = FindHtmlElement(FindText, doc.Window.Frames.Item(k).Document, cTagName, cGetAttribute, StrictMatching)If Not FindHtmlElement Is Nothing Then '找到就退出循环'WriteRunLog("FindHtmlElement结束1")Exit FunctionEnd IfNextCatch ex As ExceptionFindHtmlElement = NothingWriteRunLog("FindHtmlElement发生异常:" + ex.Message)End TryEnd FunctionSub WriteRunLog(ByVal MyMsg As String)'Using w As StreamWriter = File.AppendText("RunLog.txt")Dim w As StreamWriterIf File.Exists("RunLog.txt") ThenIf My.Computer.FileSystem.GetFileInfo("RunLog.txt").Length > 10485760 Then '2017.5.4 文件大于10M,清0w = File.CreateText("RunLog.txt")w.Write("文件大于10M,置0从头开始!")w.Write(Chr(9))Elsew = File.AppendText("RunLog.txt")End IfElsew = File.CreateText("RunLog.txt")End Ifw.Write(Now)w.Write(Chr(9)) '插入Tab键w.WriteLine(MyMsg)w.Flush()w.Close()'End UsingEnd Sub