private void button2_Click(object sender, EventArgs e)
{
listBox1.Items.Clear();
StringBuilder sb = new StringBuilder();
byte[] ResultsBuffer = new byte[8192];
string SearchResults = "http://google.com/search?q=" + textBox2.Text.Trim();//txtKeyWords? Anladigim texte girilen deger
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(SearchResults);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream resStream = response.GetResponseStream();
string tempString = null;
int count = 0;
do
{
count = resStream.Read(ResultsBuffer, 0, ResultsBuffer.Length);
if (count != 0)
{
tempString = Encoding.ASCII.GetString(ResultsBuffer, 0, count);
sb.Append(tempString);
}
}
while (count > 0);
string sbb = sb.ToString();
HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument();
html.OptionOutputAsXml = true;
html.LoadHtml(sbb);
HtmlNode doc = html.DocumentNode;
StreamWriter sw = File.AppendText("website.txt");
foreach (HtmlNode link in doc.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
string hrefValue = link.GetAttributeValue("href", string.Empty);
if (!hrefValue.ToString().ToUpper().Contains("GOOGLE") && hrefValue.ToString().Contains("/url?q=") && hrefValue.ToString().ToUpper().Contains("HTTP://"))
{
int index = hrefValue.IndexOf("&");
if (index > 0)
{
hrefValue = hrefValue.Substring(0, index);
listBox1.Items.Add(hrefValue.Replace("/url?q=", ""));
}
}
List<string> values = new List<string>();
string SourceCode = worker.GetSourceCode(SearchResults);
MatchCollection data = Regex.Matches(SourceCode, @"<p>\s*(.+?)\s*</p>", RegexOptions.Singleline);
foreach (Match m in data)
{
string value = m.Groups[1].Value;
value = value.Replace("’", "'").Replace("<strong>", "").Replace("</strong>", "").Replace("Ouml;z", "Ö").Replace("ö", "ö").Replace("ü", "ü").Replace("ç", "ç");
values.Add(value);
sw.Write(value);
}
}
sw.Close(); ;
}
public static string GetSourceCode(string url)
{
HttpWebRequest reg = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse resp = (HttpWebResponse)reg.GetResponse();
StreamReader sr = new
StreamReader(resp.GetResponseStream(),System.Text.UTF8Encoding.UTF8);
string SourceCode = sr.ReadToEnd();
sr.Close();
resp.Close();
return SourceCode大家好。我正在准备一个Windows表单应用程序来抓取。我将从windows表单中输入一些表达式,并在google中自动搜索该表达式。程序将显示我在列表框中找到的链接,并显示链接包含在文本文件中(链接中的信函)。显示链接很好,但是程序不记录文本文件中的链接内容。
我试过调试mode.As,结果程序没有进入该代码块。
foreach(Match m in data)
{
string value = m.Groups[1].Value;
value = value.Replace("’", "'").Replace("<strong>", "").Replace("</strong>", "").Replace("Ouml;z", "Ö").Replace("ö", "ö").Replace("ü", "ü").Replace("ç", "ç");
values.Add(value);
sw.Write(value);
}我试着显示链接代码块和记录链接内容代码块,他们的seperately.Both工作得很好。当我试图组合它们时,code.no无法得到一个工作的work.Please错误,但是work.Please没有提供帮助。
发布于 2017-09-29 07:04:23
private void Clicked(object sender, EventArgs e)
{
List<string> values = new List<string>();
string url = textBox1.Text;
string SourceCode = worker.GetSourceCode(url);
MatchCollection data = Regex.Matches(SourceCode, @"<p>\s*(.+?)\s*</p>", RegexOptions.Singleline);
foreach (Match m in data)
{
string value = m.Groups[1].Value;
value = value.Replace("’", "'").Replace("<strong>", "").Replace("</strong>", "").Replace("Ouml;z", "Ö").Replace("ö", "ö").Replace("ü", "ü").Replace("ç", "ç");
values.Add(value);
StreamWriter sw = File.AppendText("website.txt");
sw.Write(value);
sw.Close(); ;
}
}
private void button2_Click(object sender, EventArgs e)
{
listBox1.Items.Clear();
StringBuilder sb = new StringBuilder();
byte[] ResultsBuffer = new byte[8192];
string SearchResults = "http://google.com/search?q=" + textBox2.Text.Trim();//txtKeyWords? Anladigim texte girilen deger
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(SearchResults);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream resStream = response.GetResponseStream();
string tempString = null;
int count = 0;
do
{
count = resStream.Read(ResultsBuffer, 0, ResultsBuffer.Length);
if (count != 0)
{
tempString = Encoding.ASCII.GetString(ResultsBuffer, 0, count);
sb.Append(tempString);
}
}
while (count > 0);
string sbb = sb.ToString();
HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument();
html.OptionOutputAsXml = true;
html.LoadHtml(sbb);
HtmlNode doc = html.DocumentNode;
//StreamWriter sw = File.AppendText("website.txt");
foreach (HtmlNode link in doc.SelectNodes("//a[@href]"))
{
HtmlAttribute att = link.Attributes["href"];
string hrefValue = link.GetAttributeValue("href", string.Empty);
if (!hrefValue.ToString().ToUpper().Contains("GOOGLE") && hrefValue.ToString().Contains("/url?q=") && hrefValue.ToString().ToUpper().Contains("HTTP://"))
{
int index = hrefValue.IndexOf("&");
if (index > 0)
{
hrefValue = hrefValue.Substring(0, index);
hrefValue = hrefValue.Replace("/url?q=", "");
listBox1.Items.Add(hrefValue);
GetData(hrefValue);
}
}
}
}
private void GetData(string url)
{
StreamWriter sw = File.AppendText("website.txt");
List<string> values = new List<string>();
string SourceCode = worker.GetSourceCode(url);
MatchCollection data = Regex.Matches(SourceCode, @"<p>\s*(.+?)\s*</p>", RegexOptions.Singleline);
foreach (Match m in data)
{
string value = m.Groups[1].Value;
value = value.Replace("’", "'").Replace("<strong>", "").Replace("</strong>", "").Replace("Ouml;z", "Ö").Replace("ö", "ö").Replace("ü", "ü").Replace("ç", "ç");
values.Add(value);
sw.Write(value);
}
sw.Close();
}
private void listBox1_SelectedIndexChanged(object sender, EventArgs e)
{
}
private void label3_Click(object sender, EventArgs e)
{
}
private void label2_Click(object sender, EventArgs e)
{
}
}}
我终于成功地发射了。这是答案。只是在我的回答中留下了一些问题。它们都是关于正则表达式的。因为网站的html代码没有标准的概念。所以它需要用正则表达式来修正。当ı完成我的项目时,ı将共享我的全部代码。
https://stackoverflow.com/questions/46482400
复制相似问题