Webscrapping using c#:
Program 1: [including html tags]
[code]
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using System.Net;
using System.IO;
namespace chennaiBusCSharp
{
/// <summary>
/// Interaction logic for MainWindow.xaml
/// </summary>
public partial class MainWindow : Window
{
public MainWindow()
{
InitializeComponent();
}
private void Button_Click(object sender, RoutedEventArgs e)
{
var _plainText = string.Empty;
var _request = (HttpWebRequest)WebRequest.Create("http://agnchennaiapp.appspot.com/chennaimtcbus");
_request.Timeout = 5000;
_request.Method = "GET";
_request.ContentType = "text/plain";
using (var _webResponse = (HttpWebResponse)_request.GetResponse())
{
var _webResponseStatus = _webResponse.StatusCode;
var _stream = _webResponse.GetResponseStream();
using (var _streamReader = new StreamReader(_stream))
{
_plainText = _streamReader.ReadToEnd();
}
}
txt.Text = _plainText;
}
}
}
[/code]
Program 2: [without html tags (only inside body)]
[code]
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using System.Net;
using System.IO;
using HtmlAgilityPack;
using System.Xml.XPath;
namespace chennaiBusCSharp
{
/// <summary>
/// Interaction logic for MainWindow.xaml
/// </summary>
public partial class MainWindow : Window
{
public MainWindow()
{
InitializeComponent();
}
private void Button_Click(object sender, RoutedEventArgs e)
{
HttpWebRequest webRequest;
HttpWebResponse webResponse;
int bufCount = 0;
byte[] byteBuf = new byte[1024];
String queryContent = "";
webRequest = (HttpWebRequest)WebRequest.Create("http://agnfruit.appspot.com/fruit?txtweb-message=apple");
webRequest.Timeout = 10 * 1000;
webRequest.KeepAlive = false;
webRequest.ContentType = "text/html";
webResponse = (HttpWebResponse)webRequest.GetResponse();
StreamReader responseStream = new StreamReader(webResponse.GetResponseStream(), System.Text.Encoding.UTF8);
String strValue = responseStream.ReadToEnd();
/// queryContent = responseStream.ReadToEnd();
/// queryContent = responseStream.ReadToEnd();
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(strValue);
string bodyNode = doc.DocumentNode.SelectSingleNode("//body | //BODY").InnerText;
// bodyNode.ToString;
txt.Text = bodyNode;
/// StreamReader responseStream = new StreamReader(webResponse.GetResponseStream(),
/// System.Text.Encoding.UTF8);
}
}
}
[/code]
Recommended Books: