我正在编写类似于web爬虫的东西,它的引擎遵循以下步骤:
这是一次通话的最佳选择。
但是我有一些关于150个的链接,我应该每2分钟通过线程检查其中的每个,因此通过增加查询的装载,这个过程和server不会响应和应用程序崩溃!!
我尝试了一些技巧,比如增加sql服务器响应超时,但这一点也没有帮助。
对于这个过程,有什么更好的方法或建议吗?
谢谢
发布于 2015-02-07 17:43:51
SELECT id, link FROM posts with (nolock) WHERE link in (@listOfLowerCaseLinks)
Dim myListOfLinks As New List(Of String)
...
TheCommand.Parameters.AddWithValue("@listOfLowerCaseLinks", myListOfLinks)发布于 2015-02-07 20:23:09
我建议您将表值参数传递给此任务的存储过程。这将允许将整个列表插入到单个调用中。下面是一个示例,您可以根据实际的列长度进行调整。重要的是在posts表的链接列上有一个索引。在本例中,我假设链接是唯一的。
创建表类型和proc的type:
CREATE TYPE dbo.linkInfo AS TABLE(
link varchar(255) NOT NULL PRIMARY KEY
,descrip varchar(255)
,title varchar(255)
);
GO
ALTER PROC dbo.usp_InsertRssItems
@site varchar(255)
,@category varchar(255)
,@linkInfo dbo.linkInfo READONLY
AS
SET NOCOUNT ON;
DECLARE @InsertedPosts TABLE(link varchar(255));
INSERT INTO dbo.posts(link)
OUTPUT inserted.link INTO @InsertedPosts
SELECT link
FROM @linkInfo AS li
WHERE NOT EXISTS(
SELECT *
FROM dbo.posts AS p
WHERE p.link = li.link
);
INSERT INTO dbo.queue(link,descrip,site,title,category)
SELECT li.link, li.descrip, @site,li. title, @category
FROM @linkInfo AS li
WHERE EXISTS(
SELECT *
FROM @InsertedPosts AS ip
WHERE ip.link = li.link
);
GO示例VB.NET代码:
Sub MyTickHandler()
Dim NewItems As New List(Of Structures.RssItem)
Dim founded As Boolean = False
NewItems = RssReader.ParseRssFile(RssURL)
Dim dt = getNewRssItemDataTable(NewItems)
Dim connString = Configs.NewsDBConnection
Dim myConnection As SqlConnection = New SqlConnection("Server=localhost;Database=db;Integrated Security=SSPI;;Connection Timeout=45;Max Pool Size= 300")
Dim TheCommand As SqlCommand = New SqlCommand("dbo.usp_InsertRssItems", myConnection)
TheCommand.Parameters.Add(New SqlParameter("@site", SqlDbType.VarChar, 255)).Value = "z"
TheCommand.Parameters.Add(New SqlParameter("@category", SqlDbType.VarChar, 255)).Value = "z"
TheCommand.Parameters.Add(New SqlParameter("@linkInfo", SqlDbType.Structured)).Value = dt
TheCommand.CommandType = CommandType.StoredProcedure
myConnection.Open()
TheCommand.ExecuteNonQuery()
myConnection.Close()
myConnection.Dispose()
End Sub
Private Function getNewRssItemDataTable(NewRssItems As List(Of Structures.RssItem)) As DataTable
Dim dt As New DataTable
dt.Columns.Add("link", GetType(String)).MaxLength = 255
dt.Columns.Add("descrip", GetType(String)).MaxLength = 255
dt.Columns.Add("title", GetType(String)).MaxLength = 255
For Each NewRssItem In NewRssItems
Dim row = dt.NewRow
dt.Rows.Add(row)
row(0) = NewRssItem.link
row(1) = NewRssItem.description
row(2) = NewRssItem.title
Next NewRssItem
Return dt
End Function编辑:
我看到你提到过你想要一个SqlBulkCopy的例子。如果插入是无条件的,则可以使用以下技术:
Sub executeBulkInsert(connectionString As String, site As String, category As String, NewRssItems As List(Of Structures.RssItem))
Dim dt As New DataTable
dt.Columns.Add("link", GetType(String)).MaxLength = 255
dt.Columns.Add("descrip", GetType(String)).MaxLength = 255
dt.Columns.Add("site", GetType(String)).MaxLength = 255
dt.Columns.Add("title", GetType(String)).MaxLength = 255
dt.Columns.Add("category", GetType(String)).MaxLength = 255
For Each NewRssItem In NewRssItems
Dim row = dt.NewRow
dt.Rows.Add(row)
row(0) = site
row(1) = category
row(2) = NewRssItem.link
row(3) = NewRssItem.description
row(4) = NewRssItem.title
Next NewRssItem
Dim bcp = New SqlBulkCopy(connectionString)
bcp.DestinationTableName = "dbo.queue"
bcp.WriteToServer(dt)
End Subhttps://stackoverflow.com/questions/28385122
复制相似问题