在編寫網絡爬蟲時,HttpWebRequest幾乎可以完成絕大多數網站的抓取,為了更好的使用這一技術,我將常用的幾個功能進行了封裝,以方便調用。這個類已經在多個項目中得到使用,主要解決了Cookies相關的一些問題;如果有其它方面的問題可以提出來,我會進一步完善。
目前HttpHelper包含了以下幾個方面:
代碼如下:
1 using System; 2 using System.Collections.Generic; 3 using System.Collections.Specialized; 4 using System.IO; 5 using System.IO.ComPRession; 6 using System.Linq; 7 using System.Net; 8 using System.Net.Security; 9 using System.Security.Cryptography.X509Certificates; 10 using System.Text; 11 using System.Text.RegularExpressions; 12 using System.Collections; 13 using HtmlAgilityPack; 14 15 namespace TNIdea.Common.Helper 16 { 17 public class HttpHelper 18 { 19 public const string CharsetReg = @"(meta.*?charset=""?(?<Charset>[^/s""'>]+)""?)|(xml.*?encoding=""?(?<Charset>[^/s"">]+)""?)"; 20 21 /// <summary> 22 /// 獲取網頁的內容 23 /// </summary> 24 /// <param name="url">Url</param> 25 /// <param name="postData">Post的信息</param> 26 /// <param name="cookies">Cookies</param> 27 /// <param name="userAgent">瀏覽器標識</param> 28 /// <param name="referer">來源頁</param> 29 /// <param name="cookiesDomain">Cookies的Domian參數,配合cookies使用;為空則取url的Host</param> 30 /// <param name="encode">編碼方式,用于解析html</param> 31 /// <returns></returns> 32 public static string GetHttpContent(string url, string postData = null, CookieContainer cookies = null, string userAgent = "", string referer = "", string cookiesDomain = "", Encoding encode = null) 33 { 34 try 35 { 36 HttpWebResponse httpResponse = null; 37 if (!string.IsNullOrWhiteSpace(postData)) 38 httpResponse = CreatePostHttpResponse(url, postData, cookies: cookies, userAgent: userAgent, referer: referer); 39 else 40 httpResponse = CreateGetHttpResponse(url, cookies: cookies, userAgent: userAgent, referer: referer); 41 42 #region 根據Html頭判斷 43 string Content = null; 44 //緩沖區長度 45 const int N_CacheLength = 10000; 46 //頭部預讀取緩沖區,字節形式 47 var bytes = new List<byte>(); 48 int count = 0; 49 //頭部預讀取緩沖區,字符串 50 String cache = string.Empty; 51 52 //創建流對象并解碼 53 Stream ResponseStream; 54 switch (httpResponse.ContentEncoding.ToUpperInvariant()) 55 { 56 case "GZ
新聞熱點
疑難解答