当前位置: 首页>>代码示例>>C#>>正文


C# HtmlHelper.GetNodeByXPath方法代码示例

本文整理汇总了C#中System.HtmlHelper.GetNodeByXPath方法的典型用法代码示例。如果您正苦于以下问题:C# HtmlHelper.GetNodeByXPath方法的具体用法?C# HtmlHelper.GetNodeByXPath怎么用?C# HtmlHelper.GetNodeByXPath使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在System.HtmlHelper的用法示例。


在下文中一共展示了HtmlHelper.GetNodeByXPath方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: GetDetailHtml

        public override void GetDetailHtml(Product product, Site site)
        {
            loginfo.Location = this.GetType().Name + "." + System.Reflection.MethodBase.GetCurrentMethod().Name;
            loginfo.Url = product.Url;
            loginfo.KeyInfo = site.SiteName;

            HtmlHelper html = new HtmlHelper(product.Html);

            //get product detail nodes
            var product_node = html.GetNodeByXPath("//div[@class='products']/div[@class='product']/div[@class='desktop']");

            //get product description
            var description_nodes = html.GetNodesByXPath("./div[@class='bottom']/div[1]/div[1]/div[@class='description open-sans-regular']/*",product_node);
            string description = "";

            if (description_nodes.Count > 0)
            {
                foreach (var sellpoint_node in description_nodes)
                {
                    if (sellpoint_node.Equals(description_nodes.First()))
                    {
                        description += sellpoint_node.FirstChild.InnerText.Trim();
                    }
                    else
                    {
                        description += "|" + sellpoint_node.FirstChild.InnerText.Trim();
                    }
                }
            }

            //get sell point
            var sellpoint_nodes = html.GetNodesByXPath("./div[@class='bottom']/div[1]/div[1]/div[@class='bullets open-sans-regular']/div", product_node);
            string sellpoints = "";
            if(sellpoint_nodes.Count>0)
            {
                foreach(var sellpoint_node in sellpoint_nodes)
                {
                    if (sellpoint_node.Equals(sellpoint_nodes.First()))
                    {
                        sellpoints += sellpoint_node.FirstChild.InnerText.Trim();
                    }
                    else
                    {
                        sellpoints += "|" + sellpoint_node.FirstChild.InnerText.Trim();
                    }
                }
            }

            product.Description = description;
            product.SellPoint = sellpoints;
        }
开发者ID:wudi198,项目名称:MySpider,代码行数:51,代码来源:SpiderJetCom.cs

示例2: GetMinFreeShipping

        public void GetMinFreeShipping(Site site)
        {
            logInfo logInfo = new logInfo();
            logInfo.Location = this.GetType().Name + "." + System.Reflection.MethodBase.GetCurrentMethod().Name;

            //get the minimun purchase amount for free shipping from it's F&Q page
            string url = "https://jet.com/help-center/shipping-and-returns";

            HttpItem httpItem = new HttpItem();
            httpItem.Cookie = string.Empty;
            httpItem.Method = "GET";
            httpItem.Header = null;
            httpItem.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
            httpItem.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36";
            //httpItem.Timeout = 60000 * 5;
            httpItem.URL = url;

            HttpHelper httpHelper = new HttpHelper();

            HttpResult httpResult = null;
            try
            {
                httpResult = httpHelper.GetHtml(httpItem);
                if (httpResult == null)
                {
                    logInfo.KeyInfo = "Remote HTML is null (timeout etc.)";
                    Log4Helper.WarningLog(logInfo);
                }
                else
                {
                    if (httpResult.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        logInfo.KeyInfo = "Remote HTML is null (timeout etc.)";
                        Log4Helper.WarningLog(logInfo);
                    }
                    else
                    {
                        string content = httpResult.Html;
                        if (content.Length < 200)
                        {
                            return;
                        }

                        HtmlHelper html = new HtmlHelper(content);
                        var fq_node = html.GetNodeByXPath("//div[@class='page_content']/h1");
                        var ans = html.GetNodesByXPath("following-sibling::p", fq_node);

                        Regex minfreeshipping_pattern = new Regex(@"orders\sover\s\$(\d+)");
                        Regex shippingfee_pattern = new Regex(@"fixed\sshipping\sfee\sof\s\$(\d+(\.\d+)?)");
                        int match_min = 0;
                        int match_fee = 0;
                        foreach(HtmlNode a in ans)
                        {
                            if(minfreeshipping_pattern.Match(a.InnerText).Success)
                            {
                                min_shipping = Convert.ToSingle(minfreeshipping_pattern.Match(a.InnerText).Groups[1].Value);
                                match_min = 1;
                            }
                            if (shippingfee_pattern.Match(a.InnerText).Success)
                            {
                                fee_shipping = Convert.ToSingle(shippingfee_pattern.Match(a.InnerText).Groups[1].Value);
                                match_fee = 1;
                            }
                            if(match_min==1&&match_fee==1)
                            {
                                continue;
                            }
                        }

                    }
                }
            }
            catch (Exception ex)
            {
                logInfo.KeyInfo = "Could not get remote HTML";
                Log4Helper.ErrorLog(logInfo);
            }
        }
开发者ID:wudi198,项目名称:MySpider,代码行数:78,代码来源:SpiderJetCom.cs

示例3: ListItem

        public override void ListItem(List<Product> products, string keyword, Site site, HttpResult httpResult)
        {
            string content = httpResult.Html; //获取HTML
            this.GetMinFreeShipping(site);

            try
            {
                HtmlHelper html = new HtmlHelper(content);
                HtmlNodeCollection htmlNodeCollection = html.GetNodesByXPath("/div[1]/div[2]/div[@class='product desktop not_mobile']");
                if (htmlNodeCollection == null) { return; }
                foreach (var htmlNode in htmlNodeCollection)
                {
                    //get the url of detail page
                    var a = htmlNode.FirstChild;
                    string href = a.Attributes["href"].Value;
                    href = Regex.Split(href, "/ref=", RegexOptions.IgnoreCase)[0];
                    href = "https://jet.com" + href;

                    //get nodes under a that contain product's information
                    var NodeUnder_a = a.ChildNodes;

                    //product image url
                    var img_node = NodeUnder_a[0];
                    string img_url = img_node.Attributes["style"].Value;
                    Regex im = new Regex("url\\('(.*)'\\)");
                    Match m = im.Match(img_url);
                    if (!m.Success)
                    { continue; }
                    img_url = m.Groups[1].ToString();

                    //product's title
                    var title_node = NodeUnder_a[3];
                    if (title_node == null) { continue; }
                    string title = title_node.InnerText;

                    //prodcut's price
                    var price_node = NodeUnder_a[4];
                    if (price_node == null) { continue; }
                    string price = html.GetNodeByXPath("./div/span[@class='open-sans-regular']", price_node).InnerText;
                    price = price.Replace("$", "").Trim();

                    Product product = new Product();
                    product.Name = title;
                    product.Url = href;
                    product.Img = img_url;
                    product.Price = Convert.ToSingle(price);
                    if (product.Price < 1 || string.IsNullOrWhiteSpace(product.Name) || string.IsNullOrWhiteSpace(product.Img) || string.IsNullOrWhiteSpace(product.Url)
                                || !ValidUrl(product.Url) || !ValidUrl(product.Img))
                    {
                        continue;
                    }

                    if(product.Price > min_shipping)
                    {
                        product.Shipping = 1;
                        product.ShippingFee = 0;
                    }
                    else
                    {
                        product.Shipping = 1;
                        product.ShippingFee = fee_shipping;
                    }

                    product.Created = GetTimeStamp();

                    this.GetDetail(product, site);

                    products.Add(product);
                }

            }
            catch (Exception ex)
            {
                logInfo logInfo = new logInfo();
                logInfo.KeyInfo = site.SiteName + " " + keyword + " ListItem: " + ex.Message;
                logInfo.Location = this.GetType().Name + "." + System.Reflection.MethodBase.GetCurrentMethod().Name;
                Log4Helper.ErrorLog(logInfo);
            }
        }
开发者ID:wudi198,项目名称:MySpider,代码行数:79,代码来源:SpiderJetCom.cs


注:本文中的System.HtmlHelper.GetNodeByXPath方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。