Thứ Năm, 25 tháng 4, 2019

C# (CSharp) HtmlAgilityPack HtmlWeb.Load Examples

Đăng Bởi: Admin - tháng 4 25, 2019 - 0 Bình Luận

https://csharp.hotexamples.com/examples/HtmlAgilityPack/HtmlWeb/Load/php-htmlweb-load-method-examples.html





HtmlAgilityPack.HtmlDocument
EXAMPLE #1
0
  
        public static ChapterData getChapters(Source source, string link)
        {
            ChapterData chapter = new ChapterData();
            var web = new HtmlAgilityPack.HtmlWeb();
            web.AutoDetectEncoding = true;
            var htmlpage1 = web.Load(link);
            var pages = new List<IObservable<HtmlDocument>>();
            pages.Add(Observable.Return(htmlpage1));
            var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"/='go_page ='wid60']/option");
            for (int i = 1; i < linksToPages.Count; i++)
            {
                var linkToPage=linksToPages[i].GetAttributeValue("value", "");
                pages.Add(Observable.Start<HtmlDocument>(
                    ()=>{
                        var web2 = new HtmlAgilityPack.HtmlWeb();
                        web.AutoDetectEncoding = true;
                        return htmlpage1 = web.Load(linkToPage);
                    }
                ));

            }
            foreach (IObservable<HtmlDocument> item in pages)
            {
                HtmlDocument pagehtml = item.Wait();
                chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"/").GetAttributeValue("src","")) ;

            }
            return chapter;
        }

EXAMPLE #2
0
  
        static void Download()
        {
            List<Vogel> voegel = new List<Vogel>();

            Directory.CreateDirectory(IMAGES_FOLDER);

            Uri baseUri = new Uri("http://www.vogelwarte.ch");

            HtmlWeb web = new HtmlWeb();

            Uri queryUri = new Uri(baseUri, "voegel-der-schweiz.html?keyword=&mode=name,nameL&showPage=0&length=0&lang=de&exampleSearch=0");
            Console.WriteLine(queryUri);
            var docQuery = web.Load(queryUri.ToString());
            foreach(var elEntry in docQuery.DocumentNode.SelectNodes("//=\"listEntry\"]/td/h3/a"))
            {
                Uri uriEntry = new Uri(baseUri, Decode(elEntry.Attributes["href"].Value));
                Console.WriteLine(uriEntry);
                var docEntry = web.Load(uriEntry.ToString());

                var nodeDetail = docEntry.DocumentNode.SelectSingleNode("//div[@id=\"birdDetail\"]");

                Vogel vogel = new Vogel {
                    Name = Decode(elEntry.InnerText),
                    Gruppe =  Decode(nodeDetail.SelectSingleNode("//td[strong/text()=\"Vogelgruppe:\"]").LastChild.InnerText),
                    Lebensraum = Decode(nodeDetail.SelectSingleNode("//td[strong/text()=\"Lebensraum:\"]").LastChild.InnerText),
                    Laenge = Decode(nodeDetail.SelectSingleNode("//td[strong/text()=\"Länge (cm):\"]").LastChild.InnerText),
                    Bilder = nodeDetail.SelectNodes("//div[@id=\"gallery\"]/div/img").Select(nodeImg => new Bild
                                                                                                                {
                                                                                                                    Titel = Decode(nodeImg.Attributes["title"].Value),
                                                                                                                    Source = new Uri(baseUri, Decode(nodeImg.Attributes["src"].Value)).ToString()
                                                                                                                }).ToArray()
                };

                voegel.Add(vogel);

                foreach(var bild in vogel.Bilder)
                {
                    using(var client = new WebClient())
                    {
                        Console.WriteLine(bild.Source);

                        string strFile =  Path.GetFileName(bild.Source);
                        client.DownloadFile(bild.Source, IMAGES_FOLDER + "/" + strFile);
                        bild.Source = strFile;
                    }
                }
            }

            JavaScriptSerializer serializer = new JavaScriptSerializer();
            using(StreamWriter sr = File.CreateText("data.js"))
            {
                sr.Write("var Voegel = ");
                sr.Write(serializer.Serialize(voegel.OrderBy(v => v.Name)));
                sr.Write(";\r\nvar Gruppen = ");
                sr.Write(serializer.Serialize(voegel.Select(v => v.Gruppe).Distinct().OrderBy(g => g)));
                sr.Write(";\r\nvar Lebensraeume = ");
                sr.Write(serializer.Serialize(voegel.SelectMany(v => v.Lebensraum.Split(',').Select(l => l.Trim())).Distinct().OrderBy(l => l)));
                sr.Write(";");
            }
        }

EXAMPLE #3
0
  
        public void GeneratePages()
        {
            HtmlWeb htmlWeb = new HtmlWeb();
            _htmlDoc = htmlWeb.Load(this.Url);

            int MAX_PAGE = this.GetMaxPage(_htmlDoc);

            this.Pages = new List<WattpadPage>();

            for (int i = 1; i <= MAX_PAGE; i++)
            {
                _htmlDoc = htmlWeb.Load(this.Url + "/page/" + i);

                IEnumerable<HtmlNode> bodyList = _htmlDoc.DocumentNode.SelectNodes("//div[@id='storyText']");

                string content = "";
                if (bodyList != null)
                {
                    HtmlNode body = bodyList.ToList().First();
                    content = body.InnerHtml;
                }

                this.Pages.Add(new WattpadPage
                {
                    PageNumber = i,
                    Content = content
                });

            }
        }

EXAMPLE #4
0
  
        public void Run()
        {
            HtmlWeb htmlWeb = new HtmlWeb();

            HtmlDocument htmlDocument = htmlWeb.Load(WebConstants.BASE_URL + mCarBrand.Url);
            HtmlNode logoNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_LOGO).OuterHtml);
            mCarBrand.LogoUrl = logoNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value;
            //new Thread(new BrandLogoDownloadTask(mCarBrand).Download).Start();

            HtmlNode officialSiteNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_OFFICIAL_SITE).OuterHtml);
            mCarBrand.OfficialSite = officialSiteNode.SelectSingleNode(WebConstants.LINK_HREF).Attributes[WebConstants.HREF].Value;
            HtmlNode countryNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_COUNTRY).OuterHtml);
            mCarBrand.Country = new Country(countryNode.InnerText.Substring(countryNode.SelectSingleNode(WebConstants.EM).InnerText.Length));
            mCarBrand.Country.LogoUrl = countryNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value;
            //new Thread(new CountryLogoDownloadTask(mCarBrand.Country).Download).Start();

            HtmlNode brandListNode = HtmlNode.CreateNode(htmlDocument.DocumentNode.SelectSingleNode(WebConstants.BRAND_LIST).OuterHtml);
            mCarBrand.ListUrl = brandListNode.SelectSingleNode(WebConstants.SCRIPT_SRC).Attributes[WebConstants.SRC].Value;
            htmlDocument = htmlWeb.Load(WebConstants.BASE_URL + mCarBrand.ListUrl);
            HtmlNodeCollection factoryNodes = htmlDocument.DocumentNode.SelectNodes(WebConstants.FACTORY_NODE);
            if (factoryNodes != null)
            {
                foreach (HtmlNode tempNode in factoryNodes)
                {
                    HtmlNode factoryNode = HtmlNode.CreateNode(tempNode.OuterHtml);
                    CarFactory carFactory = new CarFactory(mCarBrand);
                    carFactory.Url = factoryNode.SelectSingleNode(WebConstants.LINK_HREF).Attributes[WebConstants.HREF].Value;
                    carFactory.Name = factoryNode.InnerText.Replace("/", "");
                    mCarBrand.CarFactoryList.Add(carFactory);
                }
            }

            runFactoryTasks();
        }

EXAMPLE #5
0
  
        public static ChapterData getChapters(Source source, string link)
        {
            ChapterData chapter = new ChapterData();
            var web = new HtmlAgilityPack.HtmlWeb();
            web.AutoDetectEncoding = true;
            var htmlpage1 = web.Load(link);
            var pages = new List<IObservable<HtmlDocument>>();
            pages.Add(Observable.Return(htmlpage1));
            var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"//*[@id='pageMenu']/option");
            for (int i = 1; i < linksToPages.Count; i++)
            {
                var linkToPage ="http://www.mangareader.net"+ linksToPages[i].GetAttributeValue("value", "");
                pages.Add(Observable.Start<HtmlDocument>(
                    () =>
                    {
                        var web2 = new HtmlAgilityPack.HtmlWeb();
                        web.AutoDetectEncoding = true;
                        return htmlpage1 = web.Load(linkToPage);
                    }
                ));

            }
            foreach (IObservable<HtmlDocument> item in pages)
            {
                HtmlDocument pagehtml = item.Wait();
                chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"//*[@id='img']").GetAttributeValue("src", ""));

            }
            return chapter;
        }

EXAMPLE #6
0
  
        public void ProcessIndex(int index)
        {
            var novel = new Novel();
            HtmlWeb htmlWeb = new HtmlWeb();

            HtmlDocument htmlDocument = htmlWeb.Load(String.Format(MainUrlPattern, index));

            var mainboxes = htmlDocument.DocumentNode.Descendants("div").Where(w => w.HasClass("mainbox")).ToArray();

            var mainContent = mainboxes[0];
            ParseMainContent(mainContent, novel);

            var releasesNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasClass("releases"));
            if (releasesNode != null)
            {
                ParseReleasesContent(releasesNode, novel);
            }

            var screenshotsNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasId("screenshots"));
            if (screenshotsNode != null)
            {
                ParseImagesContent(screenshotsNode, novel);
            }
            //staff (extract artists)
            htmlDocument = htmlWeb.Load(String.Format(StaffPattern, index));
            var staffNode = htmlDocument.DocumentNode.Descendants("div").FirstOrDefault(w => w.HasClass("staff") && w.NotContainsClass("cast"));
            if (staffNode != null)
            {
                ParseStaffContent(staffNode, novel);
            }
            //characters
            htmlDocument = htmlWeb.Load(String.Format(CharacterPattern, index));
            mainboxes = htmlDocument.DocumentNode.Descendants("div").Where(w => w.HasClass("mainbox")).ToArray();
            if (mainboxes.Length > 1)
            {
                for (int i = 1; i < mainboxes.Length; i++)
                {
                    ParseCharactersContent(mainboxes[i], novel);
                }
            }

            using (var ctx = new VNContext("VNConnectionString"))
            {
                NovelManager.SaveNovel(novel, ctx);
                Logs.Debug($@"Novel {index} finished");
            }

            Console.WriteLine(index + @" finished");
        }


EXAMPLE #7
0
  
        public static List<string> getNameOfEmail(string url)
        {
            List<string> a = new List<string>();
            HtmlWeb website = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = website.Load(url);
            HtmlNodeCollection authors = new HtmlNodeCollection(doc.DocumentNode.ParentNode); ;
            authors = doc.DocumentNode.SelectNodes(".//='author']");

            if (!Directory.Exists(@"C:\Springer\"))
            {
                Directory.CreateDirectory(@"C:\Springer\");
            }

            using (StreamWriter outputFile = new StreamWriter(@"C:\Springer\Springer Emails.txt", true))
                {
                    if (authors != null)
                    {

                        foreach (HtmlNode author in authors)
                        {

                            HtmlNode Name = author.SelectSingleNode(".//='person']");
                            HtmlNode EMail = author.SelectSingleNode(".//='envelope']");

                            if (EMail != null)
                            {
                                outputFile.WriteLine(Name.InnerText + " - " + EMail.Attributes["title"].Value);
                            }
                        }
                    }

                }

            return a;
        }
EXAMPLE #8
0
  
        public static void getHrefs(string url)
        {
            // try to fetch href values from a webpage
            try
            {
                // Create an instance of HtmlWeb
                HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb();
                // Creating an instance of HtmlDocument and loading the html source code into it.
                HtmlAgilityPack.HtmlDocument doc = htmlWeb.Load(url);

                // Adding the crawled url to the list of crawled urls
                VisitedPages.Add(url);

                // For each HTML <a> tag found in the document
                foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//]"))
                {
                    // Extract the href value from the <a> tag
                    Uri l = new Uri(baseUrl, link.Attributes["href"].Value.ToString());

                    // check if the href value does not exist in the list or the queue and if it is a page of the url the user entered.
                    if (!LinkQueue.Contains(l.ToString()) && !VisitedPages.Contains(l.ToString()) && l.Host.ToString() == baseUrl.Host.ToString())
                    {
                        // Add the href value to the queue to get scanned.
                        LinkQueue.Enqueue(l.ToString());
                    }
                }
            }
            catch
            {
                // return if anything goes wrong
                return;
            }
        }
EXAMPLE #9
0
  
        // return remote page title from URI
        public static string GetTitleFromUri(string @remoteUri)
        {
            try
            {
                // try using Open Graph to get target page title
                var graph = OpenGraph.ParseUrl(@remoteUri, "Voat.co OpenGraph Parser");
                if (!string.IsNullOrEmpty(graph.Title))
                {
                    var tmpStringWriter = new StringWriter();
                    HttpUtility.HtmlDecode(graph.Title, tmpStringWriter);
                    return tmpStringWriter.ToString();
                }

                // Open Graph parsing failed, try getting HTML TITLE tag instead
                HtmlWeb htmlWeb = new HtmlWeb();
                HtmlDocument htmlDocument = htmlWeb.Load(@remoteUri);

                if (htmlDocument != null)
                {
                    var titleNode = htmlDocument.DocumentNode.Descendants("title").SingleOrDefault();
                    if (titleNode != null)
                    {
                        return titleNode.InnerText;
                    }
                }

                return null;
            }
            catch (Exception ex)
            {
                return null;
            }
        }
EXAMPLE #10
0
  
        //find out all the plumber information in a city
        private void ExtractCity(string state, string city)
        {
            HtmlWeb web = new HtmlWeb();
            string cityUrl = RootUrl + @"/" + state + @"/" + city + @"?" + @"page=1&ipp=All";
            HtmlDocument doc = web.Load(cityUrl);

            var linksOnPage = from lnks in doc.DocumentNode.Descendants()
                              where lnks.Name == "a" &&
                                    lnks.Attributes["href"] != null &&
                                    lnks.InnerText.Trim().Length > 0
                              select lnks;

            foreach (var li in linksOnPage)
            {
                if (li.InnerText == "Phone")
                {
                    string phone, name, address;
                    phone = li.ParentNode.NextSibling.InnerText;
                    Console.WriteLine();
                    Console.WriteLine("phone: "+phone);
                    name = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[1].Trim();
                    address = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[2].Trim();
                    Console.WriteLine("name: "+ name);
                    Console.WriteLine("address: " + address);
                    file.WriteLine(name + "," + address + "," + city.Replace("-plumbers","") + "," + phone.Replace(" ", ""));
                }

            }
        }
EXAMPLE #11
0
  
        public static Bilinformation HentBilinformation(string nummerplade)
        {
            try
            {
                Bilinformation bilinformation = new Bilinformation();

                string html = "http://www.nummerplade.net/soeg/?regnr=" + nummerplade;
                HtmlWeb web = new HtmlWeb();
                HtmlDocument page = web.Load(html);

                if (page.DocumentNode != null)
                {
                    bilinformation.Maerke = page.DocumentNode.SelectSingleNode("//td[@id='maerke']").InnerText;
                    bilinformation.Model = page.DocumentNode.SelectSingleNode("//td[@id='model']").InnerText;
                    bilinformation.Variant = page.DocumentNode.SelectSingleNode("//td[@id='variant']").InnerText;
                    bilinformation.Stelnummer = page.DocumentNode.SelectSingleNode("//td[@id='stelnr']").InnerText;
                    bilinformation.Aargang = page.DocumentNode.SelectSingleNode("//td[@id='model_aar']").InnerText;
                    bilinformation.Nummerplade = page.DocumentNode.SelectSingleNode("//td[@id='regnr']").InnerText;
                }

                return bilinformation;
            }
            catch (Exception ex)
            {
                throw new IngenBilinformationException("Der blev ikke fundet nogen bilinformation på nummerpladen.", ex);
            }
        }
EXAMPLE #12
0
  
        public static string GetFromTerra(string artist, string title)
        {
            string rep = string.Empty;

            artist = (artist + "").ToLowerInvariant();
            title = (title + "").ToLowerInvariant();

            //Obter a letra da música
            HtmlWeb web = new HtmlWeb();
            HtmlDocument doc = web.Load(string.Format("http://letras.mus.br/winamp.php?t={0}-{1}", HttpUtility.UrlEncode(artist, ISOEncoding), HttpUtility.UrlEncode(title, ISOEncoding)));
            HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id='letra']/p");

            //Se encontrar a letra, retorna
            if (node == null && (artist.Contains("&") || title.Contains("&"))) {
                artist = artist.Replace('&', 'e');
                title = title.Replace('&', 'e');

                return GetFromTerra(artist, title);
            }

            node.InnerHtml = node.InnerHtml.Replace("<br>", "\r\n");

            rep = WebUtility.HtmlDecode(node.InnerText);

            return rep;
        }
EXAMPLE #13
0
  
File: Parser.cs Project: horbel/FilmCatalog
        public static void  GetText2()
        {
            List<string> outList = new List<string>();

            string html = "https://yandex.by/search/?numdoc=10&p=0&rdrnd=601861&text=kinogo.co Один дома 1990 &lr=157";
            HtmlDocument HD = new HtmlDocument();
            var web = new HtmlWeb
            {
                AutoDetectEncoding = false,
                OverrideEncoding = Encoding.UTF8 //GetEncoding("windows-1251")
            };

            HD = web.Load(html);

            
            HtmlNodeCollection NoAltElements = HD.DocumentNode.SelectNodes("//div");
            
            ///допилить
            if (NoAltElements != null)
            {
                foreach(HtmlNode node in NoAltElements)
                {

                    string outputText = node.InnerHtml;
                    Console.WriteLine(outputText);
                }
            }
            else
                Console.WriteLine("found nothing");
        }
EXAMPLE #14
0
  
 static HtmlNodeCollection GetSuburb(string URL)
 {
     HtmlWeb client = new HtmlWeb();
     string suburbURL = System.Net.WebUtility.HtmlDecode(BASE + URL);
     HtmlDocument doc = client.Load(suburbURL);
     return doc.DocumentNode.SelectNodes("//table[@id='myTable']/tbody/tr/td[4]/a");
 }
EXAMPLE #15
0
  
        public List<string> GetData(string url)
        {
            if (url == "http://")
            {
                url = "http://www.microsoft.com";
            }
            // Get a page from remote server
            var webGet = new HtmlWeb();
            var document = webGet.Load(url);

            var metaTags = document.DocumentNode.SelectNodes("//meta");

            List<string> output = new List<string>();

            

            if (metaTags != null)
            {
                foreach (var tag in metaTags)
                {
                    if (tag.Attributes["name"] != null && tag.Attributes["content"] != null)
                    {
                        output.Add("Name="+tag.Attributes["name"].Value);
                        output.Add("Content="+tag.Attributes["content"].Value);
                    }
                
                }
            }

            
            
            // return answer
            return output;
        }
EXAMPLE #16
0
  
        public List<string> GetHrefLinks(string url)
        {
            //var webDocument = new HtmlDocument();
            //webDocument.Load(GetHtml(url));
            
            // Get a page from remote server
            var webGet = new HtmlWeb();
            var webDocument = webGet.Load(url);
            
            var linksOnPage = from lnks in webDocument.DocumentNode.Descendants()
                              where lnks.Name == "a" &&
                                    lnks.Attributes["href"] != null &&
                                    lnks.InnerText.Trim().Length > 0
                              select new
                              {
                                  Url = lnks.Attributes["href"].Value,
                                  Text = lnks.InnerText
                              };
            
            List<string> newList=new List<string>();
            foreach (var item in linksOnPage)
            {

                //newList.Add(item.Url+" [[[[["+item.Text+"]]]]]");
                //For now let's just pick Url
                newList.Add(item.Url);
            }
            
            return newList;
        }
EXAMPLE #17
0
  
 public override List<NewsObject> NewestNews(int page)
 {
     string htmlUrl = RootUrl;
     if (page > 1)
     {
         htmlUrl = RootUrl + "?wpage=" + page;
     }
     List<NewsObject> results = new List<NewsObject>();
     HtmlWeb htmlWeb = new HtmlWeb()
     {
         AutoDetectEncoding = false,
         OverrideEncoding = Encoding.UTF8  //Set UTF8 để hiển thị tiếng Việt
     };
     HtmlDocument document = htmlWeb.Load(htmlUrl);
     var threadItems = document.DocumentNode.QuerySelectorAll("div.recentNews").ToList();
     foreach (var item in threadItems)
     {
         NewsObject news = new NewsObject();
         var linkNode = item.QuerySelector("h2.subHeading");
         var link = linkNode.QuerySelector("a").Attributes["href"].Value;
         news.Link = RootUrl + link;
         news.Text = TrimHtml(linkNode.InnerText);
         results.Add(news);
     }
     return results;
 }
EXAMPLE #18
0
  
 protected override string _GetSerieMiniatureUrl(Serie serie)
 {
     var web = new HtmlWeb();
     var doc = web.Load(serie.URL);
     var img = doc.DocumentNode.SelectSingleNode("//='cover']/img");
     return img.GetAttributeValue("src", "");
 }
EXAMPLE #19
0
  
        public static List<string> GetChapterUrls(string url)
        {
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument htdoc = htmlWeb.Load(url);

            List<string> ret = new List<string>();

            IEnumerable<HtmlAgilityPack.HtmlNode> selectList = htdoc.DocumentNode.Descendants("select")
                                        .Where(x => x.Attributes["class"].Value == "selectBox");

            if (selectList.ToList().Count == 0) return null;
            if (selectList == null) return null;

            var selectElement = selectList.Single();

            foreach (var cNode in selectElement.ChildNodes)
            {
                if (cNode.Name == "option")
                {
                    ret.Add(cNode.GetAttributeValue("value", "NO_URL"));
                }
            }

            //cleanups
            ret.Remove("#");

            return ret;
        }
EXAMPLE #20
0
  
        public string getBibTex(string url)
        {
            string res = "", temp = "";

            HtmlWeb web;
            HtmlDocument doc;
            HtmlNode n;

            if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487
            {
                web = new HtmlWeb();
                doc = web.Load(url);

                if (doc != null)
                    Console.WriteLine("Document Loaded!");
                else
                    Console.WriteLine("Load Error!");
                try
                {
                    if ((n = doc.DocumentNode.SelectSingleNode("//*[@id=\"bibtex\"]/p")) != null)
                    {
                        temp = n.InnerText;
                        temp = temp.Replace(",", ",\n").Replace("&nbsp;", " ");
                    }
                }
                catch (Exception e) { }
                res = temp;
                return res;
            }
            else//e.g. http://citeseer.ist.psu.edu/showciting?cid=2131272
                return res;
        }
EXAMPLE #21
0
  
        public IEnumerable<Podcast> GetLatestPodcasts(int pageNumber)
        {
            var hw = new HtmlWeb();
            hw.OverrideEncoding = Encoding.GetEncoding("ISO-8859-2");
            var doc = hw.Load("http://www.tok.fm/TOKFM/0,94037.html?str=" + pageNumber.ToString(CultureInfo.InvariantCulture));
            doc.OptionOutputAsXml = true;
            doc.OptionCheckSyntax = true;
            doc.OptionFixNestedTags = true;
            var sb = new StringBuilder();
            var stringWriter = new StringWriter(sb);

            doc.Save(stringWriter);
            var page = sb.ToString();
            var stringReader = new StringReader(page);
            doc.Load(stringReader);
            var result = new List<Podcast>();
            foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//='tokfm_play']"))
            {
                var imgNode = link.SelectSingleNode("img");
                var imageURL = String.Empty;
                if (imgNode != null)
                    imageURL = imgNode.Attributes["src"].Value;
                result.Add(new Podcast { Href = link.Attributes["href"].Value, Title = link.Attributes["title"].Value, ImageURL = imageURL });
            }

            return result;
        }
EXAMPLE #22
0
  
File: Parser.cs Project: alexkiro/Timr
        public List<TimetableItem> GetTimetableForYear(StudyYear year, HalfYear halfYear = HalfYear.None)
        {
            List<TimetableItem> timetable;

            string tempYear = Enum.GetName(typeof(StudyYear), year);
            string tempHalfYear = Enum.GetName(typeof(HalfYear), halfYear);
            if (tempHalfYear == "None") tempHalfYear = String.Empty;

            try
            {
                HtmlWeb hw = new HtmlWeb();
                HtmlDocument doc = hw.Load(String.Format("http://thor.info.uaic.ro/~orar/participanti/orar_{0}{1}.html", tempYear, halfYear));
                doc.DocumentNode.InnerHtml = doc.DocumentNode.InnerHtml.Replace("\r\n", "");

                timetable = ParseTable(doc, TimetableType.Year);
            }
            catch (WebException ex)
            {
                Logger.ExceptionLogger.Log(ex);
                timetable = null;
            }
            catch (NotSupportedException ex)
            {
                Logger.ExceptionLogger.Log(ex);
                timetable = null;
            }
            return timetable;
        }
EXAMPLE #23
0
  
        static void Main(string[] args)
        {
            var web = new HtmlWeb();
            var doc = web.Load("https://ua.linkedin.com/in/kirillmiroshnichenko");

            var name = doc.DocumentNode.SelectNodes("//='full-name']");
            Print(name);

            var summary = doc.DocumentNode.SelectNodes("//='description']");
            Print(summary);

            var skills = doc.DocumentNode.SelectNodes("//='skill-pill']");
            Print(skills);
            Console.WriteLine("-------------");

            string[] values = new string[] {"experience", "courses","projects","certifications", "languages", "education","interests",
            "patents","publications","honors","test-scores","organizations","volunteering"};

            foreach (var item in values)
            {
                Info(doc, item);
            }

            Console.ReadLine();
        }
EXAMPLE #24
0
  
        private void AddGithubJobs(string url, List<JobListing> jobListings)
        {
            HtmlWeb page = new HtmlWeb();
            var document = page.Load(url);

            string baseURL = "https://jobs.github.com";

            try
            {
                HtmlNodeCollection rows = document.DocumentNode
                    .SelectSingleNode("//='positionlist']")
                    .SelectNodes(".//='title']");

                if (rows.Count > 0)
                {
                    foreach (HtmlNode row in rows)
                    {
                        if (row.ChildNodes.Count == 5)
                        {
                            jobListings.Add(new JobListing()
                            {
                                SearchEngine = SearchEngines.GitHub,
                                Title = row.ChildNodes[1].InnerText,
                                Company = row.ChildNodes[3].ChildNodes[1].InnerText,
                                URL = baseURL + row.ChildNodes[1].ChildNodes[0].Attributes[0].Value
                            });
                        }
                    }
                }
            }
            catch { }
        }
EXAMPLE #25
0
  
  protected override void OnCreate (Bundle savedInstanceState)
  {
   base.OnCreate (savedInstanceState);

   SetContentView (Resource.Layout.Main);


   TextView textView = FindViewById<TextView> (Resource.Id.TEXT_STATUS_ID);

   HtmlWeb web = new HtmlWeb();
   HtmlDocument doc = web.Load("https://www.ltd.org/system-map/route_79x/");

   HtmlNodeCollection tags = doc.DocumentNode.SelectNodes("//td");
   foreach (HtmlNode item in tags)  
   {  
    textView.Text = textView.Text + item.InnerHtml +"\n";
   }  
   textView.Text = Regex.Replace(textView.Text, @"<[^>]*>", String.Empty);

 
   Button button = FindViewById<Button> (Resource.Id.myButton);
   
   button.Click += delegate {
    //button.Text = string.Format ("{0} clicks!", count++);
    StartActivity(typeof(Page2));
   };
  }
EXAMPLE #26
0
  
        //Public Methods
        public void Scrape(string url)
        {            
            

            try
            {
                HtmlWeb hw = new HtmlWeb();
                HtmlDocument doc = hw.Load(url);   
    
                foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//]"))
                {
                    try
                    {
                        HtmlAttribute att = link.Attributes["href"];
                        Console.WriteLine(att.Value);
                        this._results.Add(new Uri(att.Value));
                    }
                    catch
                    {

                    }
                }
            }
            catch
            {
                //What Should I Do Here?
                //Maybe Nothing for Now
            }
        }
EXAMPLE #27
0
  
        //query gametracker by map
        public static List<string> GetServersFromMap(List<string> list, string map)
        {
            HtmlWeb htmlWeb = new HtmlWeb();

            // Creates an HtmlDocument object from an URL
            HtmlAgilityPack.HtmlDocument document = htmlWeb.Load("http://www.gametracker.com/search/dota2/?search_by=map&query="+map.Trim()+"&searchipp=50");

            var query = from table in document.DocumentNode.SelectNodes("//table").Cast<HtmlNode>()
                        from row in table.SelectNodes("tr").Cast<HtmlNode>()
                        from cell in row.SelectNodes("td").Cast<HtmlNode>()
                        select new { Table = table.Id, CellText = cell.InnerText, CellClass = cell.Attributes };
            string rep = "";
            bool started = false;
            bool stopped = true;
            foreach (var cell in query)
            {

                if (cell.CellText.Contains("Rank&darr"))
                {
                    stopped = !stopped;
                    started = false;
                }
                if (started && !stopped)
                {
                    list.Add(cell.CellText.Trim());
                }

                if (cell.CellText.Contains("Server Map&nbsp;"))
                {
                    started = true;
                }

            }
            return list;
        }
EXAMPLE #28
0
  
File: Zlzp.cs Project: akaxiaok/FindJob
 public void GetJobListFromWeb()
 {
     try
     {
         var htmlWeb = new HtmlWeb { OverrideEncoding = Encoding.GetEncoding("UTF-8") };
         HtmlDocument htmlDoc =
             htmlWeb.Load(string.Format("http://sou.zhaopin.com/jobs/searchresult.ashx?jl={0}&kw={1}&p={2}",
                 DataClass.GetDic_zhilian(_pars.Addr), _pars.Key, _pars.Page));
         var nodeList =
             htmlDoc.DocumentNode.SelectNodes("//*[@='newlist']")
                 .AsParallel()
                 .ToList();
         for (int i = 1; i < nodeList.Count; i++)
         {
             var node = nodeList[i];
             var job = new JobInfo();
             job.TitleName = node.SelectSingleNode(".//='zwmc']/div/a").InnerText;
             job.InfoUrl = node.SelectSingleNode(".//='zwmc']/div/a").Attributes["href"].Value;
             job.Company = node.SelectSingleNode(".//='gsmc']/a").InnerText;
             job.Salary = node.SelectSingleNode(".//='zwyx']").InnerText;
             job.City = node.SelectSingleNode(".//='gzdd']").InnerText;
             job.Date = node.SelectSingleNode(".//='gxsj']/span").InnerText;
             job.Source = "智联招聘";
             job.Method = "月薪";
             _jobList.Add(job);
         }
     }
     catch (Exception ex)
     {
         LogSave.ErrLogSave("错误【解析】", ex);
     }
 }
        public datascraper()
        {
            string url = @"http://www.bbc.co.uk/sport/football/results/partial/competition-118996114";
            HtmlWeb htmlWeb = new HtmlWeb();
            HtmlDocument doc = new HtmlDocument{ OptionUseIdAttribute = true };

            doc = htmlWeb.Load(url);
            HtmlNodeCollection mtchrslts = doc.DocumentNode.SelectNodes("//tr[@id]");

            string date;
            string ateam;
            string hteam;
            string score;
            string idmess;
            string idnum;
            string[] teamscores;
            string teamscoreh;
            string teamscorea;

            foreach (HtmlNode matchresult in mtchrslts)
            {
                idmess = matchresult.SelectSingleNode("//tr[@id]").Id;
                idnum = idmess.Replace("match-row-", "");
                score = matchresult.SelectSingleNode("//='Score']").InnerText;
                teamscores = score.Split('-');
                teamscoreh = teamscores[0];
                teamscorea = teamscores[1];
                hteam = matchresult.SelectSingleNode("//='team-home teams')]").InnerText;
                ateam = matchresult.SelectSingleNode("//='team-away teams')]").InnerText;
                date = matchresult.SelectSingleNode("//='match-date')]").InnerText;
            }

            return;
        }
EXAMPLE #30
0
  
        public static HtmlDocument Crawl(string url)
        {
            HtmlWeb hw = new HtmlWeb();
            HtmlDocument html = hw.Load(url);

            return html;
        }

Phản Hồi Độc Giả

Một số lưu ý khi bình luận

Mọi bình luận sai nội quy sẽ bị xóa mà không cần báo trước (xem nội quy)

Bấm Thông báo cho tôi bên dưới khung bình luận để nhận thông báo khi admin trả lời

Để bình luận một đoạn code, hãy mã hóa code trước nhé