Get / Post 取得網頁資料流

之前實作的網頁資料擷取函式~提供給大家參考。

///
/// 取得需求位址的response串流(GET)
///
/// 網址來源
/// 傳回需求位址的串流
public Stream GetHttpStream(string url,string refererUrl)
{
// 使用 WebRequestFactory 建立要求。
this.wreqScrape = (HttpWebRequest)(WebRequest.Create(url));
this.wreqScrape.CookieContainer = this.cookieContainer;

//this.wreqScrape.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 5.1)";
this.wreqScrape.UserAgent = this.userAgentsSet[this.rn.Next(0,7)];

this.wreqScrape.Method = "GET";
this.wreqScrape.Timeout = 50000;

if(refererUrl != "")
this.wreqScrape.Referer = refererUrl;

try
{
// 傳回回應資料流。
this.wresScrape = (HttpWebResponse)(this.wreqScrape.GetResponse());
return this.wresScrape.GetResponseStream();
}
catch (Exception ex)
{
throw new Exception("在提取您所要求的" + url + "網頁時發生錯誤。" +
"請檢查您所鍵入的 URL 以及 Internet 連線,並再次嘗試。WebException:" + ex.Message);

//throw new Exception(ex.Message);

//失敗就丟出空stream
//return null;
}
}

///
/// 取得需求位址的response串流(POST)
///
/// 網址來源
/// 傳回需求位址的串流
public Stream PostHttpStream(string url,string refererUrl,string postData)
{
// 使用 WebRequestFactory 建立要求。
this.wreqScrape = (HttpWebRequest)(WebRequest.Create(url));
this.wreqScrape.CookieContainer = this.cookieContainer;

//this.wreqScrape.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 5.1)";
this.wreqScrape.UserAgent = this.userAgentsSet[this.rn.Next(0, 7)];

this.wreqScrape.Method = "POST";
this.wreqScrape.Timeout = 50000;
this.wreqScrape.ContentType = "application/x-www-form-urlencoded";

if (refererUrl != "")
this.wreqScrape.Referer = refererUrl;

//這樣設會讓post data 產生 error
//this.wreqScrape.ContentLength = postData.Length;

try
{
//使用utf-8編碼
UTF8Encoding encoding = new UTF8Encoding();

//將post data 轉成 bytes
byte[] bytes = encoding.GetBytes(postData);

//在擷取資料流之前,必須先設定 ContentLength 屬性的值
// Set the content length of the string being posted
this.wreqScrape.ContentLength = bytes.Length;

//HttpWebRequest.GetRequestStream - 方法傳回用來傳送資料給 HttpWebRequest 的資料流。
//傳回 Stream 物件之後,您可以使用 Stream.Write 方法以 HttpWebRequest 傳送資料。
//post data to server亦即傳送資料
using (Stream writeStream = this.wreqScrape.GetRequestStream())
{
writeStream.Write(bytes, 0, bytes.Length);
}

// 傳回回應資料流。
this.wresScrape = (HttpWebResponse)(this.wreqScrape.GetResponse());
//this.wresScrape.Cookies = this.cookieContainer.GetCookies(this.wreqScrape.RequestUri);
//foreach (Cookie cookie in this.wresScrape.Cookies)
//{

//}

return this.wresScrape.GetResponseStream();
}
catch (WebException ex)
{
throw new Exception("在提取您所要求的" + url + "網頁時發生錯誤。" +
"請檢查您所鍵入的 URL 以及 Internet 連線,並再次嘗試。WebException:" + ex.Message);

//throw new Exception(ex.Message);

//失敗就丟出空stream
//return null;
}
}

#region stream convert to string
///
/// 可將回傳的stream轉換為字串(逐行重組)
///
/// 回傳串流
/// esponse重組字串
public string StreamConvertToRecombineString(Stream stmSource,Encoding encodeType)
{
if (stmSource != null)
{
try
{
using (StreamReader sr = new StreamReader(stmSource,encodeType))
{

try
{
string temp = "";
//while ((temp = sr.ReadLine().ToLower().Trim()) != " ")
//當不是網頁結尾的話就讀取
while ((temp = sr.ReadLine()).Trim().IndexOf(" ") == -1)
{
if (temp != "")
{
if (temp.IndexOf("\t") >= 0)
{
temp = temp.Replace("\t", "").Trim();
this.sbReturnHTML.Append(temp);
}
else
{
temp = temp.Trim();
this.sbReturnHTML.Append(temp);
}
}
}

this.sbReturnHTML.Append(" ");
//置換全形數字,如12345,轉換為12345,可不加
StringTool.BigNumberConvertToSmaillNumber(ref sbReturnHTML);

return sbReturnHTML.ToString() ;
}
catch
{
return sbReturnHTML.ToString();
}
}
}
catch (Exception ex)
{
throw new Exception();
}
finally
{
this.sbReturnHTML.Remove(0, this.sbReturnHTML.Length);
this.wresScrape.Close();
}
}
else
{
return "";
}
}



arrow
arrow
    全站熱搜

    tkjh9001024 發表在 痞客邦 留言(0) 人氣()