https://csharp.hotexamples.com/examples/HtmlAgilityPack/HtmlWeb/Load/php-htmlweb-load-method-examples.html
HtmlAgilityPack.HtmlDocument
public static ChapterData getChapters ( Source source, string link)
{
ChapterData chapter = new ChapterData ( ) ;
var web = new HtmlAgilityPack. HtmlWeb ( ) ;
web. AutoDetectEncoding = true ;
var htmlpage1 = web. Load ( link) ;
var pages = new List < IObservable< HtmlDocument>> ( ) ;
pages. Add ( Observable. Return ( htmlpage1) ) ;
var linksToPages = htmlpage1. DocumentNode. SelectNodes ( @"/html/body/section[@class='readpage_top']/div[@class ='go_page clearfix']/span[@class='right']/select[@class ='wid60']/option" ) ;
for ( int i = 1 ; i < linksToPages. Count; i++ )
{
var linkToPage= linksToPages[ i] . GetAttributeValue ( "value" , "" ) ;
pages. Add ( Observable. Start < HtmlDocument > (
( ) => {
var web2 = new HtmlAgilityPack. HtmlWeb ( ) ;
web. AutoDetectEncoding = true ;
return htmlpage1 = web. Load ( linkToPage) ;
}
) ) ;
}
foreach ( IObservable< HtmlDocument> item in pages)
{
HtmlDocument pagehtml = item. Wait ( ) ;
chapter. Images. Add ( pagehtml. DocumentNode. SelectSingleNode ( @"/html/body/section[@id='viewer']/a/img[@id='image']/@src " ) . GetAttributeValue ( "src" , "" ) ) ;
}
return chapter;
}
static void Download ( )
{
List< Vogel> voegel = new List < Vogel > ( ) ;
Directory. CreateDirectory ( IMAGES_FOLDER) ;
Uri baseUri = new Uri ( "http://www.vogelwarte.ch " ) ;
HtmlWeb web = new HtmlWeb ( ) ;
Uri queryUri = new Uri ( baseUri, "voegel-der-schweiz.html?keyword=&mode=name,nameL&showPage=0&length=0&lang=de&exampleSearch=0" ) ;
Console. WriteLine ( queryUri) ;
var docQuery = web. Load ( queryUri. ToString ( ) ) ;
foreach ( var elEntry in docQuery. DocumentNode. SelectNodes ( "//table[@class=\"list\"]/tr[@class =\"listEntry\"]/td/h3/a" ) )
{
Uri uriEntry = new Uri ( baseUri, Decode ( elEntry. Attributes[ "href" ] . Value) ) ;
Console. WriteLine ( uriEntry) ;
var docEntry = web. Load ( uriEntry. ToString ( ) ) ;
var nodeDetail = docEntry. DocumentNode. SelectSingleNode ( "//div[@id=\"birdDetail\"]" ) ;
Vogel vogel = new Vogel {
Name = Decode ( elEntry. InnerText) ,
Gruppe = Decode ( nodeDetail. SelectSingleNode ( "//td[strong/text()=\"Vogelgruppe:\"]" ) . LastChild. InnerText) ,
Lebensraum = Decode ( nodeDetail. SelectSingleNode ( "//td[strong/text()=\"Lebensraum:\"]" ) . LastChild. InnerText) ,
Laenge = Decode ( nodeDetail. SelectSingleNode ( "//td[strong/text()=\"Länge (cm):\"]" ) . LastChild. InnerText) ,
Bilder = nodeDetail. SelectNodes ( "//div[@id=\"gallery\"]/div/img" ) . Select ( nodeImg => new Bild
{
Titel = Decode ( nodeImg. Attributes[ "title" ] . Value) ,
Source = new Uri ( baseUri, Decode ( nodeImg. Attributes[ "src" ] . Value) ) . ToString ( )
} ) . ToArray ( )
} ;
voegel. Add ( vogel) ;
foreach ( var bild in vogel. Bilder)
{
using ( var client = new WebClient ( ) )
{
Console. WriteLine ( bild. Source) ;
string strFile = Path. GetFileName ( bild. Source) ;
client. DownloadFile ( bild. Source, IMAGES_FOLDER + "/" + strFile) ;
bild. Source = strFile;
}
}
}
JavaScriptSerializer serializer = new JavaScriptSerializer ( ) ;
using ( StreamWriter sr = File. CreateText ( "data.js" ) )
{
sr. Write ( "var Voegel = " ) ;
sr. Write ( serializer. Serialize ( voegel. OrderBy ( v => v. Name) ) ) ;
sr. Write ( ";\r\nvar Gruppen = " ) ;
sr. Write ( serializer. Serialize ( voegel. Select ( v => v. Gruppe) . Distinct ( ) . OrderBy ( g => g) ) ) ;
sr. Write ( ";\r\nvar Lebensraeume = " ) ;
sr. Write ( serializer. Serialize ( voegel. SelectMany ( v => v. Lebensraum. Split ( ',' ) . Select ( l => l. Trim ( ) ) ) . Distinct ( ) . OrderBy ( l => l) ) ) ;
sr. Write ( ";" ) ;
}
}
public void GeneratePages ( )
{
HtmlWeb htmlWeb = new HtmlWeb ( ) ;
_htmlDoc = htmlWeb. Load ( this . Url) ;
int MAX_PAGE = this . GetMaxPage ( _htmlDoc) ;
this . Pages = new List < WattpadPage > ( ) ;
for ( int i = 1 ; i <= MAX_PAGE; i++ )
{
_htmlDoc = htmlWeb. Load ( this . Url + "/page/" + i) ;
IEnumerable< HtmlNode> bodyList = _htmlDoc. DocumentNode. SelectNodes ( "//div[@id='storyText']" ) ;
string content = "" ;
if ( bodyList != null )
{
HtmlNode body = bodyList. ToList ( ) . First ( ) ;
content = body. InnerHtml;
}
this . Pages. Add ( new WattpadPage
{
PageNumber = i,
Content = content
} ) ;
}
}
public void Run ( )
{
HtmlWeb htmlWeb = new HtmlWeb ( ) ;
HtmlDocument htmlDocument = htmlWeb. Load ( WebConstants. BASE_URL + mCarBrand. Url) ;
HtmlNode logoNode = HtmlNode. CreateNode ( htmlDocument. DocumentNode. SelectSingleNode ( WebConstants. BRAND_LOGO) . OuterHtml) ;
mCarBrand. LogoUrl = logoNode. SelectSingleNode ( WebConstants. IMAGE_SRC) . Attributes[ WebConstants. SRC ] . Value;
HtmlNode officialSiteNode = HtmlNode. CreateNode ( htmlDocument. DocumentNode. SelectSingleNode ( WebConstants. BRAND_OFFICIAL_SITE) . OuterHtml) ;
mCarBrand. OfficialSite = officialSiteNode. SelectSingleNode ( WebConstants. LINK_HREF) . Attributes[ WebConstants. HREF ] . Value;
HtmlNode countryNode = HtmlNode. CreateNode ( htmlDocument. DocumentNode. SelectSingleNode ( WebConstants. BRAND_COUNTRY) . OuterHtml) ;
mCarBrand. Country = new Country ( countryNode. InnerText. Substring ( countryNode. SelectSingleNode ( WebConstants. EM) . InnerText. Length) ) ;
mCarBrand. Country. LogoUrl = countryNode. SelectSingleNode ( WebConstants. IMAGE_SRC) . Attributes[ WebConstants. SRC ] . Value;
HtmlNode brandListNode = HtmlNode. CreateNode ( htmlDocument. DocumentNode. SelectSingleNode ( WebConstants. BRAND_LIST) . OuterHtml) ;
mCarBrand. ListUrl = brandListNode. SelectSingleNode ( WebConstants. SCRIPT_SRC) . Attributes[ WebConstants. SRC ] . Value;
htmlDocument = htmlWeb. Load ( WebConstants. BASE_URL + mCarBrand. ListUrl) ;
HtmlNodeCollection factoryNodes = htmlDocument. DocumentNode. SelectNodes ( WebConstants. FACTORY_NODE) ;
if ( factoryNodes != null )
{
foreach ( HtmlNode tempNode in factoryNodes)
{
HtmlNode factoryNode = HtmlNode. CreateNode ( tempNode. OuterHtml) ;
CarFactory carFactory = new CarFactory ( mCarBrand) ;
carFactory. Url = factoryNode. SelectSingleNode ( WebConstants. LINK_HREF) . Attributes[ WebConstants. HREF ] . Value;
carFactory. Name = factoryNode. InnerText. Replace ( "/" , "" ) ;
mCarBrand. CarFactoryList. Add ( carFactory) ;
}
}
runFactoryTasks ( ) ;
}
public static ChapterData getChapters ( Source source, string link)
{
ChapterData chapter = new ChapterData ( ) ;
var web = new HtmlAgilityPack. HtmlWeb ( ) ;
web. AutoDetectEncoding = true ;
var htmlpage1 = web. Load ( link) ;
var pages = new List < IObservable< HtmlDocument>> ( ) ;
pages. Add ( Observable. Return ( htmlpage1) ) ;
var linksToPages = htmlpage1. DocumentNode. SelectNodes ( @"//*[@id='pageMenu']/option" ) ;
for ( int i = 1 ; i < linksToPages. Count; i++ )
{
var linkToPage = "http://www.mangareader.net " + linksToPages[ i] . GetAttributeValue ( "value" , "" ) ;
pages. Add ( Observable. Start < HtmlDocument > (
( ) =>
{
var web2 = new HtmlAgilityPack. HtmlWeb ( ) ;
web. AutoDetectEncoding = true ;
return htmlpage1 = web. Load ( linkToPage) ;
}
) ) ;
}
foreach ( IObservable< HtmlDocument> item in pages)
{
HtmlDocument pagehtml = item. Wait ( ) ;
chapter. Images. Add ( pagehtml. DocumentNode. SelectSingleNode ( @"//*[@id='img']" ) . GetAttributeValue ( "src" , "" ) ) ;
}
return chapter;
}
public void ProcessIndex ( int index)
{
var novel = new Novel ( ) ;
HtmlWeb htmlWeb = new HtmlWeb ( ) ;
HtmlDocument htmlDocument = htmlWeb. Load ( String. Format ( MainUrlPattern, index) ) ;
var mainboxes = htmlDocument. DocumentNode. Descendants ( "div" ) . Where ( w => w. HasClass ( "mainbox" ) ) . ToArray ( ) ;
var mainContent = mainboxes[ 0 ] ;
ParseMainContent ( mainContent, novel) ;
var releasesNode = htmlDocument. DocumentNode. Descendants ( "div" ) . FirstOrDefault ( w => w. HasClass ( "releases" ) ) ;
if ( releasesNode != null )
{
ParseReleasesContent ( releasesNode, novel) ;
}
var screenshotsNode = htmlDocument. DocumentNode. Descendants ( "div" ) . FirstOrDefault ( w => w. HasId ( "screenshots" ) ) ;
if ( screenshotsNode != null )
{
ParseImagesContent ( screenshotsNode, novel) ;
}
htmlDocument = htmlWeb. Load ( String. Format ( StaffPattern, index) ) ;
var staffNode = htmlDocument. DocumentNode. Descendants ( "div" ) . FirstOrDefault ( w => w. HasClass ( "staff" ) && w. NotContainsClass ( "cast" ) ) ;
if ( staffNode != null )
{
ParseStaffContent ( staffNode, novel) ;
}
htmlDocument = htmlWeb. Load ( String. Format ( CharacterPattern, index) ) ;
mainboxes = htmlDocument. DocumentNode. Descendants ( "div" ) . Where ( w => w. HasClass ( "mainbox" ) ) . ToArray ( ) ;
if ( mainboxes. Length > 1 )
{
for ( int i = 1 ; i < mainboxes. Length; i++ )
{
ParseCharactersContent ( mainboxes[ i] , novel) ;
}
}
using ( var ctx = new VNContext ( "VNConnectionString" ) )
{
NovelManager. SaveNovel ( novel, ctx) ;
Logs. Debug ( $@"Novel {index} finished" ) ;
}
Console. WriteLine ( index + @" finished" ) ;
}
public static List< string > getNameOfEmail ( string url)
{
List< string > a = new List < string > ( ) ;
HtmlWeb website = new HtmlWeb ( ) ;
HtmlAgilityPack. HtmlDocument doc = website. Load ( url) ;
HtmlNodeCollection authors = new HtmlNodeCollection ( doc. DocumentNode. ParentNode) ; ;
authors = doc. DocumentNode. SelectNodes ( ".//li[@itemprop ='author']" ) ;
if ( ! Directory. Exists ( @"C:\Springer\"))
{
Directory.CreateDirectory(@" C: \Springer\") ;
}
using ( StreamWriter outputFile = new StreamWriter ( @"C:\Springer\Springer Emails.txt" , true ) )
{
if ( authors != null )
{
foreach ( HtmlNode author in authors)
{
HtmlNode Name = author. SelectSingleNode ( ".//a[@class ='person']" ) ;
HtmlNode EMail = author. SelectSingleNode ( ".//a[@class ='envelope']" ) ;
if ( EMail != null )
{
outputFile. WriteLine ( Name. InnerText + " - " + EMail. Attributes[ "title" ] . Value) ;
}
}
}
}
return a;
}
public static void getHrefs ( string url)
{
try
{
HtmlAgilityPack. HtmlWeb htmlWeb = new HtmlWeb ( ) ;
HtmlAgilityPack. HtmlDocument doc = htmlWeb. Load ( url) ;
VisitedPages. Add ( url) ;
foreach ( HtmlNode link in doc. DocumentNode. SelectNodes ( "//a[@href ]" ) )
{
Uri l = new Uri ( baseUrl, link. Attributes[ "href" ] . Value. ToString ( ) ) ;
if ( ! LinkQueue. Contains ( l. ToString ( ) ) && ! VisitedPages. Contains ( l. ToString ( ) ) && l. Host. ToString ( ) == baseUrl. Host. ToString ( ) )
{
LinkQueue. Enqueue ( l. ToString ( ) ) ;
}
}
}
catch
{
return ;
}
}
public static string GetTitleFromUri ( string @remoteUri)
{
try
{
var graph = OpenGraph. ParseUrl ( @remoteUri, "Voat.co OpenGraph Parser" ) ;
if ( ! string . IsNullOrEmpty ( graph. Title) )
{
var tmpStringWriter = new StringWriter ( ) ;
HttpUtility. HtmlDecode ( graph. Title, tmpStringWriter) ;
return tmpStringWriter. ToString ( ) ;
}
HtmlWeb htmlWeb = new HtmlWeb ( ) ;
HtmlDocument htmlDocument = htmlWeb. Load ( @remoteUri) ;
if ( htmlDocument != null )
{
var titleNode = htmlDocument. DocumentNode. Descendants ( "title" ) . SingleOrDefault ( ) ;
if ( titleNode != null )
{
return titleNode. InnerText;
}
}
return null ;
}
catch ( Exception ex)
{
return null ;
}
}
private void ExtractCity ( string state, string city)
{
HtmlWeb web = new HtmlWeb ( ) ;
string cityUrl = RootUrl + @"/" + state + @"/" + city + @"?" + @"page=1&ipp=All" ;
HtmlDocument doc = web. Load ( cityUrl) ;
var linksOnPage = from lnks in doc. DocumentNode. Descendants ( )
where lnks. Name == "a" &&
lnks. Attributes[ "href" ] != null &&
lnks. InnerText. Trim ( ) . Length > 0
select lnks;
foreach ( var li in linksOnPage)
{
if ( li. InnerText == "Phone" )
{
string phone, name, address;
phone = li. ParentNode. NextSibling. InnerText;
Console. WriteLine ( ) ;
Console. WriteLine ( "phone: " + phone) ;
name = li. ParentNode. ParentNode. ParentNode. ParentNode. FirstChild. NextSibling. InnerText. Split ( '\n' ) [ 1 ] . Trim ( ) ;
address = li. ParentNode. ParentNode. ParentNode. ParentNode. FirstChild. NextSibling. InnerText. Split ( '\n' ) [ 2 ] . Trim ( ) ;
Console. WriteLine ( "name: " + name) ;
Console. WriteLine ( "address: " + address) ;
file. WriteLine ( name + "," + address + "," + city. Replace ( "-plumbers" , "" ) + "," + phone. Replace ( " " , "" ) ) ;
}
}
}
public static Bilinformation HentBilinformation ( string nummerplade)
{
try
{
Bilinformation bilinformation = new Bilinformation ( ) ;
string html = "http://www.nummerplade.net/soeg/?regnr= " + nummerplade;
HtmlWeb web = new HtmlWeb ( ) ;
HtmlDocument page = web. Load ( html) ;
if ( page. DocumentNode != null )
{
bilinformation. Maerke = page. DocumentNode. SelectSingleNode ( "//td[@id='maerke']" ) . InnerText;
bilinformation. Model = page. DocumentNode. SelectSingleNode ( "//td[@id='model']" ) . InnerText;
bilinformation. Variant = page. DocumentNode. SelectSingleNode ( "//td[@id='variant']" ) . InnerText;
bilinformation. Stelnummer = page. DocumentNode. SelectSingleNode ( "//td[@id='stelnr']" ) . InnerText;
bilinformation. Aargang = page. DocumentNode. SelectSingleNode ( "//td[@id='model_aar']" ) . InnerText;
bilinformation. Nummerplade = page. DocumentNode. SelectSingleNode ( "//td[@id='regnr']" ) . InnerText;
}
return bilinformation;
}
catch ( Exception ex)
{
throw new IngenBilinformationException ( "Der blev ikke fundet nogen bilinformation på nummerpladen." , ex) ;
}
}
public static string GetFromTerra ( string artist, string title)
{
string rep = string . Empty;
artist = ( artist + "" ) . ToLowerInvariant ( ) ;
title = ( title + "" ) . ToLowerInvariant ( ) ;
HtmlWeb web = new HtmlWeb ( ) ;
HtmlDocument doc = web. Load ( string . Format ( "http://letras.mus.br/winamp.php?t= {0}-{1}" , HttpUtility. UrlEncode ( artist, ISOEncoding) , HttpUtility. UrlEncode ( title, ISOEncoding) ) ) ;
HtmlNode node = doc. DocumentNode. SelectSingleNode ( "//div[@id='letra']/p" ) ;
if ( node == null && ( artist. Contains ( "&" ) || title. Contains ( "&" ) ) ) {
artist = artist. Replace ( '&' , 'e' ) ;
title = title. Replace ( '&' , 'e' ) ;
return GetFromTerra ( artist, title) ;
}
node. InnerHtml = node. InnerHtml. Replace ( "<br>" , "\r\n" ) ;
rep = WebUtility. HtmlDecode ( node. InnerText) ;
return rep;
}
public static void GetText2 ( )
{
List< string > outList = new List < string > ( ) ;
string html = "https://yandex.by/search/?numdoc=10&p=0&rdrnd=601861&text=kinogo.co Один дома 1990 &lr=157" ;
HtmlDocument HD = new HtmlDocument ( ) ;
var web = new HtmlWeb
{
AutoDetectEncoding = false ,
OverrideEncoding = Encoding. UTF8
} ;
HD = web. Load ( html) ;
HtmlNodeCollection NoAltElements = HD. DocumentNode. SelectNodes ( "//div" ) ;
if ( NoAltElements != null )
{
foreach ( HtmlNode node in NoAltElements)
{
string outputText = node. InnerHtml;
Console. WriteLine ( outputText) ;
}
}
else
Console. WriteLine ( "found nothing" ) ;
}
static HtmlNodeCollection GetSuburb ( string URL)
{
HtmlWeb client = new HtmlWeb ( ) ;
string suburbURL = System. Net. WebUtility. HtmlDecode ( BASE + URL) ;
HtmlDocument doc = client. Load ( suburbURL) ;
return doc. DocumentNode. SelectNodes ( "//table[@id='myTable']/tbody/tr/td[4]/a" ) ;
}
public List< string > GetData ( string url)
{
if ( url == "http://" )
{
url = "http://www.microsoft.com " ;
}
var webGet = new HtmlWeb ( ) ;
var document = webGet. Load ( url) ;
var metaTags = document. DocumentNode. SelectNodes ( "//meta" ) ;
List< string > output = new List < string > ( ) ;
if ( metaTags != null )
{
foreach ( var tag in metaTags)
{
if ( tag. Attributes[ "name" ] != null && tag. Attributes[ "content" ] != null )
{
output. Add ( "Name=" + tag. Attributes[ "name" ] . Value) ;
output. Add ( "Content=" + tag. Attributes[ "content" ] . Value) ;
}
}
}
return output;
}
public List< string > GetHrefLinks ( string url)
{
var webGet = new HtmlWeb ( ) ;
var webDocument = webGet. Load ( url) ;
var linksOnPage = from lnks in webDocument. DocumentNode. Descendants ( )
where lnks. Name == "a" &&
lnks. Attributes[ "href" ] != null &&
lnks. InnerText. Trim ( ) . Length > 0
select new
{
Url = lnks. Attributes[ "href" ] . Value,
Text = lnks. InnerText
} ;
List< string > newList= new List < string > ( ) ;
foreach ( var item in linksOnPage)
{
newList. Add ( item. Url) ;
}
return newList;
}
public override List< NewsObject> NewestNews ( int page)
{
string htmlUrl = RootUrl;
if ( page > 1 )
{
htmlUrl = RootUrl + "?wpage=" + page;
}
List< NewsObject> results = new List < NewsObject > ( ) ;
HtmlWeb htmlWeb = new HtmlWeb ( )
{
AutoDetectEncoding = false ,
OverrideEncoding = Encoding. UTF8
} ;
HtmlDocument document = htmlWeb. Load ( htmlUrl) ;
var threadItems = document. DocumentNode. QuerySelectorAll ( "div.recentNews" ) . ToList ( ) ;
foreach ( var item in threadItems)
{
NewsObject news = new NewsObject ( ) ;
var linkNode = item. QuerySelector ( "h2.subHeading" ) ;
var link = linkNode. QuerySelector ( "a" ) . Attributes[ "href" ] . Value;
news. Link = RootUrl + link;
news. Text = TrimHtml ( linkNode. InnerText) ;
results. Add ( news) ;
}
return results;
}
protected override string _GetSerieMiniatureUrl ( Serie serie)
{
var web = new HtmlWeb ( ) ;
var doc = web. Load ( serie. URL) ;
var img = doc. DocumentNode. SelectSingleNode ( "//div[@id='series_info']/div[@class ='cover']/img" ) ;
return img. GetAttributeValue ( "src" , "" ) ;
}
public static List< string > GetChapterUrls ( string url)
{
HtmlAgilityPack. HtmlWeb htmlWeb = new HtmlWeb ( ) ;
HtmlAgilityPack. HtmlDocument htdoc = htmlWeb. Load ( url) ;
List< string > ret = new List < string > ( ) ;
IEnumerable< HtmlAgilityPack. HtmlNode> selectList = htdoc. DocumentNode. Descendants ( "select" )
. Where ( x => x. Attributes[ "class" ] . Value == "selectBox" ) ;
if ( selectList. ToList ( ) . Count == 0 ) return null ;
if ( selectList == null ) return null ;
var selectElement = selectList. Single ( ) ;
foreach ( var cNode in selectElement. ChildNodes)
{
if ( cNode. Name == "option" )
{
ret. Add ( cNode. GetAttributeValue ( "value" , "NO_URL" ) ) ;
}
}
ret. Remove ( "#" ) ;
return ret;
}
public string getBibTex ( string url)
{
string res = "" , temp = "" ;
HtmlWeb web;
HtmlDocument doc;
HtmlNode n;
if ( url. Contains ( "viewdoc" ) )
{
web = new HtmlWeb ( ) ;
doc = web. Load ( url) ;
if ( doc != null )
Console. WriteLine ( "Document Loaded!" ) ;
else
Console. WriteLine ( "Load Error!" ) ;
try
{
if ( ( n = doc. DocumentNode. SelectSingleNode ( "//*[@id=\"bibtex\"]/p" ) ) != null )
{
temp = n. InnerText;
temp = temp. Replace ( "," , ",\n" ) . Replace ( " " , " " ) ;
}
}
catch ( Exception e) { }
res = temp;
return res;
}
else
return res;
}
public IEnumerable< Podcast> GetLatestPodcasts ( int pageNumber)
{
var hw = new HtmlWeb ( ) ;
hw. OverrideEncoding = Encoding. GetEncoding ( "ISO-8859-2" ) ;
var doc = hw. Load ( "http://www.tok.fm/TOKFM/0 ,94037.html?str=" + pageNumber. ToString ( CultureInfo. InvariantCulture) ) ;
doc. OptionOutputAsXml = true ;
doc. OptionCheckSyntax = true ;
doc. OptionFixNestedTags = true ;
var sb = new StringBuilder ( ) ;
var stringWriter = new StringWriter ( sb) ;
doc. Save ( stringWriter) ;
var page = sb. ToString ( ) ;
var stringReader = new StringReader ( page) ;
doc. Load ( stringReader) ;
var result = new List < Podcast > ( ) ;
foreach ( HtmlNode link in doc. DocumentNode. SelectNodes ( "//a[@class ='tokfm_play']" ) )
{
var imgNode = link. SelectSingleNode ( "img" ) ;
var imageURL = String. Empty;
if ( imgNode != null )
imageURL = imgNode. Attributes[ "src" ] . Value;
result. Add ( new Podcast { Href = link. Attributes[ "href" ] . Value, Title = link. Attributes[ "title" ] . Value, ImageURL = imageURL } ) ;
}
return result;
}
public List< TimetableItem> GetTimetableForYear ( StudyYear year, HalfYear halfYear = HalfYear. None)
{
List< TimetableItem> timetable;
string tempYear = Enum. GetName ( typeof ( StudyYear) , year) ;
string tempHalfYear = Enum. GetName ( typeof ( HalfYear) , halfYear) ;
if ( tempHalfYear == "None" ) tempHalfYear = String. Empty;
try
{
HtmlWeb hw = new HtmlWeb ( ) ;
HtmlDocument doc = hw. Load ( String. Format ( "http://thor.info.uaic.ro/~orar/participanti/orar_ {0}{1}.html" , tempYear, halfYear) ) ;
doc. DocumentNode. InnerHtml = doc. DocumentNode. InnerHtml. Replace ( "\r\n" , "" ) ;
timetable = ParseTable ( doc, TimetableType. Year) ;
}
catch ( WebException ex)
{
Logger. ExceptionLogger. Log ( ex) ;
timetable = null ;
}
catch ( NotSupportedException ex)
{
Logger. ExceptionLogger. Log ( ex) ;
timetable = null ;
}
return timetable;
}
static void Main ( string [ ] args)
{
var web = new HtmlWeb ( ) ;
var doc = web. Load ( "https://ua.linkedin.com/in/kirillmiroshnichenko " ) ;
var name = doc. DocumentNode. SelectNodes ( "//span[@class ='full-name']" ) ;
Print ( name) ;
var summary = doc. DocumentNode. SelectNodes ( "//p[@class ='description']" ) ;
Print ( summary) ;
var skills = doc. DocumentNode. SelectNodes ( "//span[@class ='skill-pill']" ) ;
Print ( skills) ;
Console. WriteLine ( "-------------" ) ;
string [ ] values = new string [ ] { "experience" , "courses" , "projects" , "certifications" , "languages" , "education" , "interests" ,
"patents" , "publications" , "honors" , "test-scores" , "organizations" , "volunteering" } ;
foreach ( var item in values)
{
Info ( doc, item) ;
}
Console. ReadLine ( ) ;
}
private void AddGithubJobs ( string url, List< JobListing> jobListings)
{
HtmlWeb page = new HtmlWeb ( ) ;
var document = page. Load ( url) ;
string baseURL = "https://jobs.github.com " ;
try
{
HtmlNodeCollection rows = document. DocumentNode
. SelectSingleNode ( "//table[@class ='positionlist']" )
. SelectNodes ( ".//td[@class ='title']" ) ;
if ( rows. Count > 0 )
{
foreach ( HtmlNode row in rows)
{
if ( row. ChildNodes. Count == 5 )
{
jobListings. Add ( new JobListing ( )
{
SearchEngine = SearchEngines. GitHub,
Title = row. ChildNodes[ 1 ] . InnerText,
Company = row. ChildNodes[ 3 ] . ChildNodes[ 1 ] . InnerText,
URL = baseURL + row. ChildNodes[ 1 ] . ChildNodes[ 0 ] . Attributes[ 0 ] . Value
} ) ;
}
}
}
}
catch { }
}
protected override void OnCreate ( Bundle savedInstanceState)
{
base . OnCreate ( savedInstanceState) ;
SetContentView ( Resource. Layout. Main) ;
TextView textView = FindViewById < TextView > ( Resource. Id. TEXT_STATUS_ID) ;
HtmlWeb web = new HtmlWeb ( ) ;
HtmlDocument doc = web. Load ( "https://www.ltd.org/system-map/route_79x/ " ) ;
HtmlNodeCollection tags = doc. DocumentNode. SelectNodes ( "//td" ) ;
foreach ( HtmlNode item in tags)
{
textView. Text = textView. Text + item. InnerHtml + "\n" ;
}
textView. Text = Regex. Replace ( textView. Text, @"<[^>]*>" , String. Empty) ;
Button button = FindViewById < Button > ( Resource. Id. myButton) ;
button. Click += delegate {
StartActivity ( typeof ( Page2) ) ;
} ;
}
public void Scrape ( string url)
{
try
{
HtmlWeb hw = new HtmlWeb ( ) ;
HtmlDocument doc = hw. Load ( url) ;
foreach ( HtmlNode link in doc. DocumentNode. SelectNodes ( "//a[@href ]" ) )
{
try
{
HtmlAttribute att = link. Attributes[ "href" ] ;
Console. WriteLine ( att. Value) ;
this . _results. Add ( new Uri ( att. Value) ) ;
}
catch
{
}
}
}
catch
{
}
}
public static List< string > GetServersFromMap ( List< string > list, string map)
{
HtmlWeb htmlWeb = new HtmlWeb ( ) ;
HtmlAgilityPack. HtmlDocument document = htmlWeb. Load ( "http://www.gametracker.com/search/dota2/?search_by=map&query= " + map. Trim ( ) + "&searchipp=50" ) ;
var query = from table in document. DocumentNode. SelectNodes ( "//table" ) . Cast < HtmlNode > ( )
from row in table. SelectNodes ( "tr" ) . Cast < HtmlNode > ( )
from cell in row. SelectNodes ( "td" ) . Cast < HtmlNode > ( )
select new { Table = table. Id, CellText = cell. InnerText, CellClass = cell. Attributes } ;
string rep = "" ;
bool started = false ;
bool stopped = true ;
foreach ( var cell in query)
{
if ( cell. CellText. Contains ( "Rank&darr" ) )
{
stopped = ! stopped;
started = false ;
}
if ( started && ! stopped)
{
list. Add ( cell. CellText. Trim ( ) ) ;
}
if ( cell. CellText. Contains ( "Server Map " ) )
{
started = true ;
}
}
return list;
}
public void GetJobListFromWeb ( )
{
try
{
var htmlWeb = new HtmlWeb { OverrideEncoding = Encoding. GetEncoding ( "UTF-8" ) } ;
HtmlDocument htmlDoc =
htmlWeb. Load ( string . Format ( "http://sou.zhaopin.com/jobs/searchresult.ashx?jl= {0}&kw={1}&p={2}" ,
DataClass. GetDic_zhilian ( _pars. Addr) , _pars. Key, _pars. Page) ) ;
var nodeList =
htmlDoc. DocumentNode. SelectNodes ( "//*[@id='newlist_list_content_table']/table[@class ='newlist']" )
. AsParallel ( )
. ToList ( ) ;
for ( int i = 1 ; i < nodeList. Count; i++ )
{
var node = nodeList[ i] ;
var job = new JobInfo ( ) ;
job. TitleName = node. SelectSingleNode ( ".//tr/td[@class ='zwmc']/div/a" ) . InnerText;
job. InfoUrl = node. SelectSingleNode ( ".//tr/td[@class ='zwmc']/div/a" ) . Attributes[ "href" ] . Value;
job. Company = node. SelectSingleNode ( ".//tr/td[@class ='gsmc']/a" ) . InnerText;
job. Salary = node. SelectSingleNode ( ".//tr/td[@class ='zwyx']" ) . InnerText;
job. City = node. SelectSingleNode ( ".//tr/td[@class ='gzdd']" ) . InnerText;
job. Date = node. SelectSingleNode ( ".//tr/td[@class ='gxsj']/span" ) . InnerText;
job. Source = "智联招聘" ;
job. Method = "月薪" ;
_jobList. Add ( job) ;
}
}
catch ( Exception ex)
{
LogSave. ErrLogSave ( "错误【解析】" , ex) ;
}
}
public datascraper ( )
{
string url = @"http://www.bbc.co.uk/sport/football/results/partial/competition-118996114 " ;
HtmlWeb htmlWeb = new HtmlWeb ( ) ;
HtmlDocument doc = new HtmlDocument { OptionUseIdAttribute = true } ;
doc = htmlWeb. Load ( url) ;
HtmlNodeCollection mtchrslts = doc. DocumentNode. SelectNodes ( "//tr[@id]" ) ;
string date;
string ateam;
string hteam;
string score;
string idmess;
string idnum;
string [ ] teamscores;
string teamscoreh;
string teamscorea;
foreach ( HtmlNode matchresult in mtchrslts)
{
idmess = matchresult. SelectSingleNode ( "//tr[@id]" ) . Id;
idnum = idmess. Replace ( "match-row-" , "" ) ;
score = matchresult. SelectSingleNode ( "//abbr[@title ='Score']" ) . InnerText;
teamscores = score. Split ( '-' ) ;
teamscoreh = teamscores[ 0 ] ;
teamscorea = teamscores[ 1 ] ;
hteam = matchresult. SelectSingleNode ( "//p[(@class ='team-home teams')]" ) . InnerText;
ateam = matchresult. SelectSingleNode ( "//p[(@class ='team-away teams')]" ) . InnerText;
date = matchresult. SelectSingleNode ( "//td[(@class ='match-date')]" ) . InnerText;
}
return ;
}
public static HtmlDocument Crawl ( string url)
{
HtmlWeb hw = new HtmlWeb ( ) ;
HtmlDocument html = hw. Load ( url) ;
return html;
}
Một số lưu ý khi bình luận
Mọi bình luận sai nội quy sẽ bị xóa mà không cần báo trước (xem nội quy)
Bấm Thông báo cho tôi bên dưới khung bình luận để nhận thông báo khi admin trả lời
Để bình luận một đoạn code, hãy mã hóa code trước nhé