VB.NET 网络爬取数据(以爬取上海海事大学录取分数线为例)
VB.NET 爬虫,网抓,代码写得比较差;望见谅!
Imports System.IO
Imports System.Net
Imports System.Text.RegularExpressions
Public Class Form1
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
On Error Resume Next
For Each dm In ListBox1.Items
For Each XL In ListBox2.Items
Dim STR As String
STR = NET_WZ.GET_DATA("http://admission.shmtu.edu.cn/score/" & XL & "/" & dm)
Dim TMP As Object
If TMP <> Nothing Then
For I As Integer = 0 To UBound(NET_WZ.GET_TABLE(STR))
If TMP(I) <> "" Then
Dim item As New ListViewItem With {
.Text = dm
item.SubItems.Add(TMP(I + 1))
item.SubItems.Add(TMP(I + 2))
item.SubItems.Add(TMP(I + 3))
item.SubItems.Add(TMP(I + 4))
I += 4
End If
End If
End Sub
End Class
Public Class NET_WZ
Public Shared Function GET_DATA(ByVal _URL As String) As String
Dim contents As Object = Nothing
Dim request As WebRequest
Dim response As WebResponse
request = WebRequest.Create(_URL)
response = request.GetResponse()
Dim reader As New StreamReader(response.GetResponseStream())
contents = reader.ReadToEnd()
Catch ex As Exception
End Try
GET_DATA = contents
End Function
Public Shared Function GET_TABLE(ByVal STR As String) As Object
Dim strReg, sReg As String
Dim TMP(1000) As Object
Dim I As Integer = 0
strReg = "(?is)(?<=<table class=).+?(?=</table>)"
sReg = "(?<=<td>).+?(?=</td>)"
Dim mc, mth As MatchCollection
mc = Regex.Matches(STR, strReg)
For Each m As Match In mc
Dim temp As String = Replace(m.Value, "<td></td><td>" & vbCr & "</td>", "<td>0</td><td>0</td>")
mth = Regex.Matches(temp, sReg)
For Each h As Match In mth
TMP(I) = h.Value
I += 1
Catch ex As Exception