小弟我通过WebClient获取到了文章网页的源码要这么使用正则才能获取到文章的内容呢
我通过WebClient获取到了文章网页的源码要这么使用正则才能获取到文章的内容呢
这个是我的代码 现在我获取到了标题 就是文章内容不知道这么获取,我现在获取下来的是文章页的源码 要这么处理才能拿到
文章内容呢?
------解决方案--------------------
这个是我的代码 现在我获取到了标题 就是文章内容不知道这么获取,我现在获取下来的是文章页的源码 要这么处理才能拿到
文章内容呢?
string html = string.Empty;
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("http://www.aomenduchang123001.com");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
using (Stream stream = response.GetResponseStream())
{
using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
{
html = reader.ReadToEnd();
reader.Close();
}
stream.Close();
}
string title = Regex.Match(html, @"<div\s*?class=""topcenter"">\s*?<ul[^>]*?>([\s\S]*?)</ul>").Groups[1].Value.Trim();
MatchCollection matches = Regex.Matches(title, @"<li>[^<]*?<a\s*?href=""([^""]*?)""[^>]*?>([^<]*?)</a>\s*?</li>");
foreach (Match match in matches)
{
//match.Groups[1].Value是内容的url,你根据url取相应的内容;match.Groups[2].Value是标题
Console.WriteLine(match.Groups[1].Value + "\t" + match.Groups[2].Value+"\t"+match.Groups[3].Value);
richTextBox1.Text ="http://www.aomenduchang123001.com"+ match.Groups[1].Value.ToString();
richTextBox2.Text = match.Groups[2].Value.ToString();
}
WebClient MyWebClient = new WebClient();
MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
Byte[] pageData = MyWebClient.DownloadData(richTextBox1.Text);
string html1 = richTextBox1.Text;
string pageHtml = Encoding.UTF8.GetString(pageData);//如果获取网站页面采用的是UTF-8,则使用这句
richTextBox3.Text = pageHtml;
using (StreamWriter sw = new StreamWriter(@"C:\Users\admin\Desktop\11.txt"))
{
sw.Write(richTextBox1.Text + "\r\n" + richTextBox2.Text+"\r\n"+richTextBox3.Text);
}
------解决方案--------------------
string url = "http://www.aomenduchang123001.com";
string html = string.Empty;
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
using (Stream stream = response.GetResponseStream())
{
using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
{