https://csharp.hotexamples.com/examples/HtmlAgilityPack/HtmlWeb/Load/php-htmlweb-load-method-examples.html
| HtmlAgilityPack.HtmlDocument |
public static ChapterData getChapters(Source source, string link)
{
ChapterData chapter = new ChapterData();
var web = new HtmlAgilityPack.HtmlWeb();
web.AutoDetectEncoding = true;
var htmlpage1 = web.Load(link);
var pages = new List<IObservable<HtmlDocument>>();
pages.Add(Observable.Return(htmlpage1));
var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"/html/body/section[@class='readpage_top']/div[@class='go_page clearfix']/span[@class='right']/select[@class='wid60']/option");
for (int i = 1; i < linksToPages.Count; i++)
{
var linkToPage=linksToPages[i].GetAttributeValue("value", "");
pages.Add(Observable.Start<HtmlDocument>(
()=>{
var web2 = new HtmlAgilityPack.HtmlWeb();
web.AutoDetectEncoding = true;
return htmlpage1 = web.Load(linkToPage);
}
));
}
foreach (IObservable<HtmlDocument> item in pages)
{
HtmlDocument pagehtml = item.Wait();
chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"/html/body/section[@id='viewer']/a/img[@id='image']/@src").GetAttributeValue("src","")) ;
}
return chapter;
}
static void Download()
{
List<Vogel> voegel = new List<Vogel>();
Directory.CreateDirectory(IMAGES_FOLDER);
Uri baseUri = new Uri("http://www.vogelwarte.ch");
HtmlWeb web = new HtmlWeb();
Uri queryUri = new Uri(baseUri, "voegel-der-schweiz.html?keyword=&mode=name,nameL&showPage=0&length=0&lang=de&exampleSearch=0");
Console.WriteLine(queryUri);
var docQuery = web.Load(queryUri.ToString());
foreach(var elEntry in docQuery.DocumentNode.SelectNodes("//table[@class=\"list\"]/tr[@class=\"listEntry\"]/td/h3/a"))
{
Uri uriEntry = new Uri(baseUri, Decode(elEntry.Attributes["href"].Value));
Console.WriteLine(uriEntry);
var docEntry = web.Load(uriEntry.ToString());
var nodeDetail = docEntry.DocumentNode.SelectSingleNode("//div[@id=\"birdDetail\"]");
Vogel vogel = new Vogel {
Name = Decode(elEntry.InnerText),
Gruppe = Decode(nodeDetail.SelectSingleNode("//td[strong/text()=\"Vogelgruppe:\"]").LastChild.InnerText),
Lebensraum = Decode(nodeDetail.SelectSingleNode("//td[strong/text()=\"Lebensraum:\"]").LastChild.InnerText),
Laenge = Decode(nodeDetail.SelectSingleNode("//td[strong/text()=\"Länge (cm):\"]").LastChild.InnerText),
Bilder = nodeDetail.SelectNodes("//div[@id=\"gallery\"]/div/img").Select(nodeImg => new Bild
{
Titel = Decode(nodeImg.Attributes["title"].Value),
Source = new Uri(baseUri, Decode(nodeImg.Attributes["src"].Value)).ToString()
}).ToArray()
};
voegel.Add(vogel);
foreach(var bild in vogel.Bilder)
{
using(var client = new WebClient())
{
Console.WriteLine(bild.Source);
string strFile = Path.GetFileName(bild.Source);
client.DownloadFile(bild.Source, IMAGES_FOLDER + "/" + strFile);
bild.Source = strFile;
}
}
}
JavaScriptSerializer serializer = new JavaScriptSerializer();
using(StreamWriter sr = File.CreateText("data.js"))
{
sr.Write("var Voegel = ");
sr.Write(serializer.Serialize(voegel.OrderBy(v => v.Name)));
sr.Write(";\r\nvar Gruppen = ");
sr.Write(serializer.Serialize(voegel.Select(v => v.Gruppe).Distinct().OrderBy(g => g)));
sr.Write(";\r\nvar Lebensraeume = ");
sr.Write(serializer.Serialize(voegel.SelectMany(v => v.Lebensraum.Split(',').Select(l => l.Trim())).Distinct().OrderBy(l => l)));
sr.Write(";");
}
}
public void GeneratePages()
{
HtmlWeb htmlWeb = new HtmlWeb();
_htmlDoc = htmlWeb.Load(this.Url);
int MAX_PAGE = this.GetMaxPage(_htmlDoc);
this.Pages = new List<WattpadPage>();
for (int i = 1; i <= MAX_PAGE; i++)
{
_htmlDoc = htmlWeb.Load(this.Url + "/page/" + i);
IEnumerable<HtmlNode> bodyList = _htmlDoc.DocumentNode.SelectNodes("//div[@id='storyText']");
string content = "";
if (bodyList != null)
{
HtmlNode body = bodyList.ToList().First();
content = body.InnerHtml;
}
this.Pages.Add(new WattpadPage
{
PageNumber = i,
Content = content
});
}
}
public void Run()
{
HtmlWeb htmlWeb = new HtmlWeb();
HtmlDocument htmlDocument = htmlWeb.Load(WebConstants.BASE_URL + mCarBrand.Url);
HtmlNode logoNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_LOGO).OuterHtml);
mCarBrand.LogoUrl = logoNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value;
//new Thread(new BrandLogoDownloadTask(mCarBrand).Download).Start();
HtmlNode officialSiteNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_OFFICIAL_SITE).OuterHtml);
mCarBrand.OfficialSite = officialSiteNode.SelectSingleNode(WebConstants.LINK_HREF).Attributes[WebConstants.HREF].Value;
HtmlNode countryNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_COUNTRY).OuterHtml);
mCarBrand.Country = new Country(countryNode.InnerText.Substring(countryNode.SelectSingleNode(WebConstants.EM).InnerText.Length));
mCarBrand.Country.LogoUrl = countryNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value;
//new Thread(new CountryLogoDownloadTask(mCarBrand.Country).Download).Start();
HtmlNode brandListNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_LIST).OuterHtml);
mCarBrand.ListUrl = brandListNode.SelectSingleNode(WebConstants.SCRIPT_SRC).Attributes[WebConstants.SRC].Value;
htmlDocument = htmlWeb.Load(WebConstants.BASE_URL + mCarBrand.ListUrl);
HtmlNodeCollection factoryNodes = htmlDocument.DocumentNode.SelectNodes(WebConstants.FACTORY_NODE);
if (factoryNodes != null)
{
foreach (HtmlNode tempNode in factoryNodes)
{
HtmlNode factoryNode = HtmlNode.CreateNode(tempNode.OuterHtml);
CarFactory carFactory = new CarFactory(mCarBrand);
carFactory.Url = factoryNode.SelectSingleNode(WebConstants.LINK_HREF).Attributes[WebConstants.HREF].Value;
carFactory.Name = factoryNode.InnerText.Replace("/", "");
mCarBrand.CarFactoryList.Add(carFactory);
}
}
runFactoryTasks();
}
public static ChapterData getChapters(Source source, string link)
{
ChapterData chapter = new ChapterData();
var web = new HtmlAgilityPack.HtmlWeb();
web.AutoDetectEncoding = true;
var htmlpage1 = web.Load(link);
var pages = new List<IObservable<HtmlDocument>>();
pages.Add(Observable.Return(htmlpage1));
var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"//*[@id='pageMenu']/option");
for (int i = 1; i < linksToPages.Count; i++)
{
var linkToPage ="http://www.mangareader.net"+ linksToPages[i].GetAttributeValue("value", "");
pages.Add(Observable.Start<HtmlDocument>(
() =>
{
var web2 = new HtmlAgilityPack.HtmlWeb();
web.AutoDetectEncoding = true;
return htmlpage1 = web.Load(linkToPage);
}
));
}
foreach (IObservable<HtmlDocument> item in pages)
{
HtmlDocument pagehtml = item.Wait();
chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"//*[@id='img']").GetAttributeValue("src", ""));
}
return chapter;
}
public void ProcessIndex(int index)
{
var novel = new Novel();
HtmlWeb htmlWeb = new HtmlWeb();
HtmlDocument htmlDocument = htmlWeb.Load(String.Format(MainUrlPattern, index));
var mainboxes = htmlDocument.DocumentNode.Descendants("div").Where(w => w.HasClass("mainbox")).ToArray();
var mainContent = mainboxes[0];
ParseMainContent(mainContent, novel);
var releasesNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasClass("releases"));
if (releasesNode != null)
{
ParseReleasesContent(releasesNode, novel);
}
var screenshotsNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasId("screenshots"));
if (screenshotsNode != null)
{
ParseImagesContent(screenshotsNode, novel);
}
//staff (extract artists)
htmlDocument = htmlWeb.Load(String.Format(StaffPattern, index));
var staffNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasClass("staff") && w.NotContainsClass("cast"));
if (staffNode != null)
{
ParseStaffContent(staffNode, novel);
}
//characters
htmlDocument = htmlWeb.Load(String.Format(CharacterPattern, index));
mainboxes = htmlDocument.DocumentNode.Descendants("div").Where(w => w.HasClass("mainbox")).ToArray();
if (mainboxes.Length > 1)
{
for (int i = 1; i < mainboxes.Length; i++)
{
ParseCharactersContent(mainboxes[i], novel);
}
}
using (var ctx = new VNContext("VNConnectionString"))
{
NovelManager.SaveNovel(novel, ctx);
Logs.Debug($@"Novel {index} finished");
}
Console.WriteLine(index + @" finished");
}
public static List<string> getNameOfEmail(string url)
{
List<string> a = new List<string>();
HtmlWeb website = new HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = website.Load(url);
HtmlNodeCollection authors = new HtmlNodeCollection(doc.DocumentNode.ParentNode); ;
authors = doc.DocumentNode.SelectNodes(".//li[@itemprop='author']");
if (!Directory.Exists(@"C:\Springer\"))
{
Directory.CreateDirectory(@"C:\Springer\");
}
using (StreamWriter outputFile = new StreamWriter(@"C:\Springer\Springer Emails.txt", true))
{
if (authors != null)
{
foreach (HtmlNode author in authors)
{
HtmlNode Name = author.SelectSingleNode(".//a[@class='person']");
HtmlNode EMail = author.SelectSingleNode(".//a[@class='envelope']");
if (EMail != null)
{
outputFile.WriteLine(Name.InnerText + " - " + EMail.Attributes["title"].Value);
}
}
}
}
return a;
}
public static void getHrefs(string url)
{
// try to fetch href values from a webpage
try
{
// Create an instance of HtmlWeb
HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb();
// Creating an instance of HtmlDocument and loading the html source code into it.
HtmlAgilityPack.HtmlDocument doc = htmlWeb.Load(url);
// Adding the crawled url to the list of crawled urls
VisitedPages.Add(url);
// For each HTML <a> tag found in the document
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
{
// Extract the href value from the <a> tag
Uri l = new Uri(baseUrl, link.Attributes["href"].Value.ToString());
// check if the href value does not exist in the list or the queue and if it is a page of the url the user entered.
if (!LinkQueue.Contains(l.ToString()) && !VisitedPages.Contains(l.ToString()) && l.Host.ToString() == baseUrl.Host.ToString())
{
// Add the href value to the queue to get scanned.
LinkQueue.Enqueue(l.ToString());
}
}
}
catch
{
// return if anything goes wrong
return;
}
}
// return remote page title from URI
public static string GetTitleFromUri(string @remoteUri)
{
try
{
// try using Open Graph to get target page title
var graph = OpenGraph.ParseUrl(@remoteUri, "Voat.co OpenGraph Parser");
if (!string.IsNullOrEmpty(graph.Title))
{
var tmpStringWriter = new StringWriter();
HttpUtility.HtmlDecode(graph.Title, tmpStringWriter);
return tmpStringWriter.ToString();
}
// Open Graph parsing failed, try getting HTML TITLE tag instead
HtmlWeb htmlWeb = new HtmlWeb();
HtmlDocument htmlDocument = htmlWeb.Load(@remoteUri);
if (htmlDocument != null)
{
var titleNode = htmlDocument.DocumentNode.Descendants("title").SingleOrDefault();
if (titleNode != null)
{
return titleNode.InnerText;
}
}
return null;
}
catch (Exception ex)
{
return null;
}
}
//find out all the plumber information in a city
private void ExtractCity(string state, string city)
{
HtmlWeb web = new HtmlWeb();
string cityUrl = RootUrl + @"/" + state + @"/" + city + @"?" + @"page=1&ipp=All";
HtmlDocument doc = web.Load(cityUrl);
var linksOnPage = from lnks in doc.DocumentNode.Descendants()
where lnks.Name == "a" &&
lnks.Attributes["href"] != null &&
lnks.InnerText.Trim().Length > 0
select lnks;
foreach (var li in linksOnPage)
{
if (li.InnerText == "Phone")
{
string phone, name, address;
phone = li.ParentNode.NextSibling.InnerText;
Console.WriteLine();
Console.WriteLine("phone: "+phone);
name = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[1].Trim();
address = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[2].Trim();
Console.WriteLine("name: "+ name);
Console.WriteLine("address: " + address);
file.WriteLine(name + "," + address + "," + city.Replace("-plumbers","") + "," + phone.Replace(" ", ""));
}
}
}
public static Bilinformation HentBilinformation(string nummerplade)
{
try
{
Bilinformation bilinformation = new Bilinformation();
string html = "http://www.nummerplade.net/soeg/?regnr=" + nummerplade;
HtmlWeb web = new HtmlWeb();
HtmlDocument page = web.Load(html);
if (page.DocumentNode != null)
{
bilinformation.Maerke = page.DocumentNode.SelectSingleNode("//td[@id='maerke']").InnerText;
bilinformation.Model = page.DocumentNode.SelectSingleNode("//td[@id='model']").InnerText;
bilinformation.Variant = page.DocumentNode.SelectSingleNode("//td[@id='variant']").InnerText;
bilinformation.Stelnummer = page.DocumentNode.SelectSingleNode("//td[@id='stelnr']").InnerText;
bilinformation.Aargang = page.DocumentNode.SelectSingleNode("//td[@id='model_aar']").InnerText;
bilinformation.Nummerplade = page.DocumentNode.SelectSingleNode("//td[@id='regnr']").InnerText;
}
return bilinformation;
}
catch (Exception ex)
{
throw new IngenBilinformationException("Der blev ikke fundet nogen bilinformation på nummerpladen.", ex);
}
}
public static string GetFromTerra(string artist, string title)
{
string rep = string.Empty;
artist = (artist + "").ToLowerInvariant();
title = (title + "").ToLowerInvariant();
//Obter a letra da música
HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load(string.Format("http://letras.mus.br/winamp.php?t={0}-{1}", HttpUtility.UrlEncode(artist, ISOEncoding), HttpUtility.UrlEncode(title, ISOEncoding)));
HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id='letra']/p");
//Se encontrar a letra, retorna
if (node == null && (artist.Contains("&") || title.Contains("&"))) {
artist = artist.Replace('&', 'e');
title = title.Replace('&', 'e');
return GetFromTerra(artist, title);
}
node.InnerHtml = node.InnerHtml.Replace("<br>", "\r\n");
rep = WebUtility.HtmlDecode(node.InnerText);
return rep;
}
public static void GetText2()
{
List<string> outList = new List<string>();
string html = "https://yandex.by/search/?numdoc=10&p=0&rdrnd=601861&text=kinogo.co Один дома 1990 &lr=157";
HtmlDocument HD = new HtmlDocument();
var web = new HtmlWeb
{
AutoDetectEncoding = false,
OverrideEncoding = Encoding.UTF8 //GetEncoding("windows-1251")
};
HD = web.Load(html);
HtmlNodeCollection NoAltElements = HD.DocumentNode.SelectNodes("//div");
///допилить
if (NoAltElements != null)
{
foreach(HtmlNode node in NoAltElements)
{
string outputText = node.InnerHtml;
Console.WriteLine(outputText);
}
}
else
Console.WriteLine("found nothing");
}
static HtmlNodeCollection GetSuburb(string URL)
{
HtmlWeb client = new HtmlWeb();
string suburbURL = System.Net.WebUtility.HtmlDecode(BASE + URL);
HtmlDocument doc = client.Load(suburbURL);
return doc.DocumentNode.SelectNodes("//table[@id='myTable']/tbody/tr/td[4]/a");
}
public List<string> GetData(string url)
{
if (url == "http://")
{
url = "http://www.microsoft.com";
}
// Get a page from remote server
var webGet = new HtmlWeb();
var document = webGet.Load(url);
var metaTags = document.DocumentNode.SelectNodes("//meta");
List<string> output = new List<string>();
if (metaTags != null)
{
foreach (var tag in metaTags)
{
if (tag.Attributes["name"] != null && tag.Attributes["content"] != null)
{
output.Add("Name="+tag.Attributes["name"].Value);
output.Add("Content="+tag.Attributes["content"].Value);
}
}
}
// return answer
return output;
}
public List<string> GetHrefLinks(string url)
{
//var webDocument = new HtmlDocument();
//webDocument.Load(GetHtml(url));
// Get a page from remote server
var webGet = new HtmlWeb();
var webDocument = webGet.Load(url);
var linksOnPage = from lnks in webDocument.DocumentNode.Descendants()
where lnks.Name == "a" &&
lnks.Attributes["href"] != null &&
lnks.InnerText.Trim().Length > 0
select new
{
Url = lnks.Attributes["href"].Value,
Text = lnks.InnerText
};
List<string> newList=new List<string>();
foreach (var item in linksOnPage)
{
//newList.Add(item.Url+" [[[[["+item.Text+"]]]]]");
//For now let's just pick Url
newList.Add(item.Url);
}
return newList;
}
public override List<NewsObject> NewestNews(int page)
{
string htmlUrl = RootUrl;
if (page > 1)
{
htmlUrl = RootUrl + "?wpage=" + page;
}
List<NewsObject> results = new List<NewsObject>();
HtmlWeb htmlWeb = new HtmlWeb()
{
AutoDetectEncoding = false,
OverrideEncoding = Encoding.UTF8 //Set UTF8 để hiển thị tiếng Việt
};
HtmlDocument document = htmlWeb.Load(htmlUrl);
var threadItems = document.DocumentNode.QuerySelectorAll("div.recentNews").ToList();
foreach (var item in threadItems)
{
NewsObject news = new NewsObject();
var linkNode = item.QuerySelector("h2.subHeading");
var link = linkNode.QuerySelector("a").Attributes["href"].Value;
news.Link = RootUrl + link;
news.Text = TrimHtml(linkNode.InnerText);
results.Add(news);
}
return results;
}
protected override string _GetSerieMiniatureUrl(Serie serie)
{
var web = new HtmlWeb();
var doc = web.Load(serie.URL);
var img = doc.DocumentNode.SelectSingleNode("//div[@id='series_info']/div[@class='cover']/img");
return img.GetAttributeValue("src", "");
}
public static List<string> GetChapterUrls(string url)
{
HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb();
HtmlAgilityPack.HtmlDocument htdoc = htmlWeb.Load(url);
List<string> ret = new List<string>();
IEnumerable<HtmlAgilityPack.HtmlNode> selectList = htdoc.DocumentNode.Descendants("select")
.Where(x => x.Attributes["class"].Value == "selectBox");
if (selectList.ToList().Count == 0) return null;
if (selectList == null) return null;
var selectElement = selectList.Single();
foreach (var cNode in selectElement.ChildNodes)
{
if (cNode.Name == "option")
{
ret.Add(cNode.GetAttributeValue("value", "NO_URL"));
}
}
//cleanups
ret.Remove("#");
return ret;
}
public string getBibTex(string url)
{
string res = "", temp = "";
HtmlWeb web;
HtmlDocument doc;
HtmlNode n;
if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487
{
web = new HtmlWeb();
doc = web.Load(url);
if (doc != null)
Console.WriteLine("Document Loaded!");
else
Console.WriteLine("Load Error!");
try
{
if ((n = doc.DocumentNode.SelectSingleNode("//*[@id=\"bibtex\"]/p")) != null)
{
temp = n.InnerText;
temp = temp.Replace(",", ",\n").Replace(" ", " ");
}
}
catch (Exception e) { }
res = temp;
return res;
}
else//e.g. http://citeseer.ist.psu.edu/showciting?cid=2131272
return res;
}
public IEnumerable<Podcast> GetLatestPodcasts(int pageNumber)
{
var hw = new HtmlWeb();
hw.OverrideEncoding = Encoding.GetEncoding("ISO-8859-2");
var doc = hw.Load("http://www.tok.fm/TOKFM/0,94037.html?str=" + pageNumber.ToString(CultureInfo.InvariantCulture));
doc.OptionOutputAsXml = true;
doc.OptionCheckSyntax = true;
doc.OptionFixNestedTags = true;
var sb = new StringBuilder();
var stringWriter = new StringWriter(sb);
doc.Save(stringWriter);
var page = sb.ToString();
var stringReader = new StringReader(page);
doc.Load(stringReader);
var result = new List<Podcast>();
foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//a[@class='tokfm_play']"))
{
var imgNode = link.SelectSingleNode("img");
var imageURL = String.Empty;
if (imgNode != null)
imageURL = imgNode.Attributes["src"].Value;
result.Add(new Podcast { Href = link.Attributes["href"].Value, Title = link.Attributes["title"].Value, ImageURL = imageURL });
}
return result;
}
public List<TimetableItem> GetTimetableForYear(StudyYear year, HalfYear halfYear = HalfYear.None)
{
List<TimetableItem> timetable;
string tempYear = Enum.GetName(typeof(StudyYear), year);
string tempHalfYear = Enum.GetName(typeof(HalfYear), halfYear);
if (tempHalfYear == "None") tempHalfYear = String.Empty;
try
{
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = hw.Load(String.Format("http://thor.info.uaic.ro/~orar/participanti/orar_{0}{1}.html", tempYear, halfYear));
doc.DocumentNode.InnerHtml = doc.DocumentNode.InnerHtml.Replace("\r\n", "");
timetable = ParseTable(doc, TimetableType.Year);
}
catch (WebException ex)
{
Logger.ExceptionLogger.Log(ex);
timetable = null;
}
catch (NotSupportedException ex)
{
Logger.ExceptionLogger.Log(ex);
timetable = null;
}
return timetable;
}
static void Main(string[] args)
{
var web = new HtmlWeb();
var doc = web.Load("https://ua.linkedin.com/in/kirillmiroshnichenko");
var name = doc.DocumentNode.SelectNodes("//span[@class='full-name']");
Print(name);
var summary = doc.DocumentNode.SelectNodes("//p[@class='description']");
Print(summary);
var skills = doc.DocumentNode.SelectNodes("//span[@class='skill-pill']");
Print(skills);
Console.WriteLine("-------------");
string[] values = new string[] {"experience", "courses","projects","certifications", "languages", "education","interests",
"patents","publications","honors","test-scores","organizations","volunteering"};
foreach (var item in values)
{
Info(doc, item);
}
Console.ReadLine();
}
private void AddGithubJobs(string url, List<JobListing> jobListings)
{
HtmlWeb page = new HtmlWeb();
var document = page.Load(url);
string baseURL = "https://jobs.github.com";
try
{
HtmlNodeCollection rows = document.DocumentNode
.SelectSingleNode("//table[@class='positionlist']")
.SelectNodes(".//td[@class='title']");
if (rows.Count > 0)
{
foreach (HtmlNode row in rows)
{
if (row.ChildNodes.Count == 5)
{
jobListings.Add(new JobListing()
{
SearchEngine = SearchEngines.GitHub,
Title = row.ChildNodes[1].InnerText,
Company = row.ChildNodes[3].ChildNodes[1].InnerText,
URL = baseURL + row.ChildNodes[1].ChildNodes[0].Attributes[0].Value
});
}
}
}
}
catch { }
}
protected override void OnCreate (Bundle savedInstanceState)
{
base.OnCreate (savedInstanceState);
SetContentView (Resource.Layout.Main);
TextView textView = FindViewById<TextView> (Resource.Id.TEXT_STATUS_ID);
HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load("https://www.ltd.org/system-map/route_79x/");
HtmlNodeCollection tags = doc.DocumentNode.SelectNodes("//td");
foreach (HtmlNode item in tags)
{
textView.Text = textView.Text + item.InnerHtml +"\n";
}
textView.Text = Regex.Replace(textView.Text, @"<[^>]*>", String.Empty);
Button button = FindViewById<Button> (Resource.Id.myButton);
button.Click += delegate {
//button.Text = string.Format ("{0} clicks!", count++);
StartActivity(typeof(Page2));
};
}
//Public Methods
public void Scrape(string url)
{
try
{
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = hw.Load(url);
foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
{
try
{
HtmlAttribute att = link.Attributes["href"];
Console.WriteLine(att.Value);
this._results.Add(new Uri(att.Value));
}
catch
{
}
}
}
catch
{
//What Should I Do Here?
//Maybe Nothing for Now
}
}
//query gametracker by map
public static List<string> GetServersFromMap(List<string> list, string map)
{
HtmlWeb htmlWeb = new HtmlWeb();
// Creates an HtmlDocument object from an URL
HtmlAgilityPack.HtmlDocument document = htmlWeb.Load("http://www.gametracker.com/search/dota2/?search_by=map&query="+map.Trim()+"&searchipp=50");
var query = from table in document.DocumentNode.SelectNodes("//table").Cast<HtmlNode>()
from row in table.SelectNodes("tr").Cast<HtmlNode>()
from cell in row.SelectNodes("td").Cast<HtmlNode>()
select new { Table = table.Id, CellText = cell.InnerText, CellClass = cell.Attributes };
string rep = "";
bool started = false;
bool stopped = true;
foreach (var cell in query)
{
if (cell.CellText.Contains("Rank&darr"))
{
stopped = !stopped;
started = false;
}
if (started && !stopped)
{
list.Add(cell.CellText.Trim());
}
if (cell.CellText.Contains("Server Map "))
{
started = true;
}
}
return list;
}
public void GetJobListFromWeb()
{
try
{
var htmlWeb = new HtmlWeb { OverrideEncoding = Encoding.GetEncoding("UTF-8") };
HtmlDocument htmlDoc =
htmlWeb.Load(string.Format("http://sou.zhaopin.com/jobs/searchresult.ashx?jl={0}&kw={1}&p={2}",
DataClass.GetDic_zhilian(_pars.Addr), _pars.Key, _pars.Page));
var nodeList =
htmlDoc.DocumentNode.SelectNodes("//*[@id='newlist_list_content_table']/table[@class='newlist']")
.AsParallel()
.ToList();
for (int i = 1; i < nodeList.Count; i++)
{
var node = nodeList[i];
var job = new JobInfo();
job.TitleName = node.SelectSingleNode(".//tr/td[@class='zwmc']/div/a").InnerText;
job.InfoUrl = node.SelectSingleNode(".//tr/td[@class='zwmc']/div/a").Attributes["href"].Value;
job.Company = node.SelectSingleNode(".//tr/td[@class='gsmc']/a").InnerText;
job.Salary = node.SelectSingleNode(".//tr/td[@class='zwyx']").InnerText;
job.City = node.SelectSingleNode(".//tr/td[@class='gzdd']").InnerText;
job.Date = node.SelectSingleNode(".//tr/td[@class='gxsj']/span").InnerText;
job.Source = "智联招聘";
job.Method = "月薪";
_jobList.Add(job);
}
}
catch (Exception ex)
{
LogSave.ErrLogSave("错误【解析】", ex);
}
}
public datascraper()
{
string url = @"http://www.bbc.co.uk/sport/football/results/partial/competition-118996114";
HtmlWeb htmlWeb = new HtmlWeb();
HtmlDocument doc = new HtmlDocument{ OptionUseIdAttribute = true };
doc = htmlWeb.Load(url);
HtmlNodeCollection mtchrslts = doc.DocumentNode.SelectNodes("//tr[@id]");
string date;
string ateam;
string hteam;
string score;
string idmess;
string idnum;
string[] teamscores;
string teamscoreh;
string teamscorea;
foreach (HtmlNode matchresult in mtchrslts)
{
idmess = matchresult.SelectSingleNode("//tr[@id]").Id;
idnum = idmess.Replace("match-row-", "");
score = matchresult.SelectSingleNode("//abbr[@title='Score']").InnerText;
teamscores = score.Split('-');
teamscoreh = teamscores[0];
teamscorea = teamscores[1];
hteam = matchresult.SelectSingleNode("//p[(@class='team-home teams')]").InnerText;
ateam = matchresult.SelectSingleNode("//p[(@class='team-away teams')]").InnerText;
date = matchresult.SelectSingleNode("//td[(@class='match-date')]").InnerText;
}
return;
}
public static HtmlDocument Crawl(string url)
{
HtmlWeb hw = new HtmlWeb();
HtmlDocument html = hw.Load(url);
return html;
}
