当前位置: 首页>>代码示例>>C#>>正文


C# Sgml.SgmlReader类代码示例

本文整理汇总了C#中Sgml.SgmlReader的典型用法代码示例。如果您正苦于以下问题:C# SgmlReader类的具体用法?C# SgmlReader怎么用?C# SgmlReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


SgmlReader类属于Sgml命名空间,在下文中一共展示了SgmlReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: Main

        static void Main(string[] args)
        {
            var array = new JArray();
            var crawled = new HashSet<string>();
            var sgmlReader = new SgmlReader
            {
                Href = "http://groups.google.com/group/ravendb/web/docs-http-api-index"
            };
            crawled.Add(sgmlReader.Href);
            var doc = new XmlDocument();
            doc.Load(sgmlReader);

            var layout = doc.SelectSingleNode("//div[@class='layout']");

            var index = new JObject(new JProperty("Html", FixLinks(layout.InnerXml)), new JProperty("Name", "Index"));

            array.Add(new JObject(
                      	new JProperty("DocId", "raven_documentation/index"),
                      	new JProperty("Document", index),
                        new JProperty("Type", "raven documentation"),
                      	new JProperty("Metadata",
                      	              new JObject(new JProperty("Raven-View-Template", "/raven/JSONTemplates/documentation.html")))
                      	));

            AddDocumentsFromLinks(array, crawled, layout.SelectNodes(".//a"));

            File.WriteAllText(args[0], array.ToString(Formatting.Indented));
        }
开发者ID:torkelo,项目名称:ravendb,代码行数:28,代码来源:Program.cs

示例2: Create

        public static XmlReader Create(string baseUri, string html)
        {
            var assembly = typeof(SgmlReader).Assembly;
            var name = "Html.dtd";
            var dtd = default(SgmlDtd);

            using (var resource = assembly.GetManifestResourceStream(name))
            {
                var input = new StreamReader(resource);
                dtd = SgmlDtd.Parse(new Uri(baseUri), "HTML", input, null, null, null);
            }

            var reader = new SgmlReader
            {
                WhitespaceHandling = WhitespaceHandling.All,
                CaseFolding = CaseFolding.ToLower,
                Dtd = dtd,
                IgnoreDtd = true,
                InputStream = new StringReader(html),
            };

            reader.SetBaseUri(baseUri);

            return reader;
        }
开发者ID:JesusPanDeVida,项目名称:VocabularioTeologiaBiblica,代码行数:25,代码来源:SgmlFactory.cs

示例3: ParseHtml

        // Creates XmlDocument from html content and return it with rootitem "<root>".
        public static XmlDocument ParseHtml(string sContent)
        {
            StringReader sr = new StringReader("<root>" + sContent + "</root>");
            SgmlReader reader = new SgmlReader();
            reader.WhitespaceHandling = WhitespaceHandling.All;
            reader.CaseFolding = Sgml.CaseFolding.ToLower;
            reader.InputStream = sr;

            StringWriter sw = new StringWriter();
            XmlTextWriter w = new XmlTextWriter(sw);
            w.Formatting = Formatting.Indented;
            w.WriteStartDocument();
            reader.Read();
            while (!reader.EOF)
            {
                w.WriteNode(reader, true);
            }
            w.Flush();
            w.Close();

            sw.Flush();

            // create document
            XmlDocument doc = new XmlDocument();
            doc.PreserveWhitespace = true;
            doc.XmlResolver = null;
            doc.LoadXml(sw.ToString());

            reader.Close();

            return doc;
        }
开发者ID:Cabana,项目名称:CMSConverter,代码行数:33,代码来源:SgmlUtil.cs

示例4: Parser

 public Parser()
 {
     _sgmlReader = new SgmlReader();
     _sgmlReader.DocType = "HTML";
     _sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
     _sgmlReader.CaseFolding = CaseFolding.ToLower;
 }
开发者ID:benogle,项目名称:html2markup,代码行数:7,代码来源:Parser.cs

示例5: Main

    static void Main(string[] args)
    {
        if (args.Length < 2) {
            Console.WriteLine("Usage: BenchSgmlReader.exe filename iterations");
            return;
        }

        var streamReader = new StreamReader(args[0]);
        string text = streamReader.ReadToEnd();
        streamReader.Close();

        int n = int.Parse(args[1]);

        var start = DateTime.Now;
        for (int i = 0; i < n; i++) {
            SgmlReader sgmlReader = new SgmlReader();
            sgmlReader.DocType = "HTML";
            sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
            //sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
            sgmlReader.InputStream = new StringReader(text);

            XmlDocument doc = new XmlDocument();
            doc.PreserveWhitespace = true;
            doc.XmlResolver = null;
            doc.Load(sgmlReader);
        }
        var stop = DateTime.Now;

        var duration = stop - start;
        Console.WriteLine("{0} s", (duration.TotalMilliseconds / 1000.0).ToString(CultureInfo.InvariantCulture));
    }
开发者ID:FrameworkBy,项目名称:html-parsers-benchmark,代码行数:31,代码来源:BenchSgmlReader.cs

示例6: GetPv

        public static int GetPv(int cid, DateTime date)
        {
            var hatenaId = ConfigurationManager.AppSettings["hatenaId"];
            var hatenaPassword = ConfigurationManager.AppSettings["hatenaPassword"];

            var wc = new CustomWebClient() { Encoding = Encoding.UTF8 };
            wc.Headers.Add("Content-Type", "application/x-www-form-urlencoded");

            var data = string.Format(LoginParamBase, hatenaId, hatenaPassword);
            wc.UploadString("https://www.hatena.ne.jp/login", "POST", data);

            var url = string.Format(CounterUrlBase, hatenaId, cid, date.ToString("yyyy-MM-dd"));
            var res = wc.DownloadString(url);

            XDocument xml;
            using (var sgml = new SgmlReader() { IgnoreDtd = true })
            {
                sgml.InputStream = new StringReader(res);
                xml = XDocument.Load(sgml);
            }
            var ns = xml.Root.Name.Namespace;
            var count = xml.Descendants(ns + "table")
                .Where(x => x.FirstAttribute.Value == "totalcount")
                .Descendants(ns + "strong")
                .First().Value;
            return int.Parse(count);
        }
开发者ID:nakaji,项目名称:nakaji-api,代码行数:27,代码来源:HatenaCounterHelper.cs

示例7: GetWellFormedHTML

 public static string GetWellFormedHTML(string html, string xpathNavPath)
 {
     // StreamReader sReader = null;
     StringWriter sw = null;
     SgmlReader reader = null;
     XmlTextWriter writer = null;
     try
     {
         //  if (uri == String.Empty) uri = "http://www.XMLforASP.NET";
         // HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
         //  HttpWebResponse res = (HttpWebResponse)req.GetResponse();
         //  sReader = new StreamReader(res.GetResponseStream());
         reader = new SgmlReader();
         reader.DocType = "HTML";
         reader.InputStream = new StringReader(html);
         sw = new StringWriter();
         writer = new XmlTextWriter(sw);
         writer.Formatting = Formatting.Indented;
         //writer.WriteStartElement("Test");
         while (reader.Read())
         {
             if (reader.NodeType != XmlNodeType.Whitespace)
             {
                 writer.WriteNode(reader, true);
             }
         }
         //writer.WriteEndElement();
         if (xpathNavPath == null)
         {
             string sr = sw.ToString();
             sr = sr.Replace("\r", "\n");
             sr = sr.Replace("\n\n", "\n");
             return sr;
         }
         else
         { //Filter out nodes from HTML
             StringBuilder sb = new StringBuilder();
             XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
             XPathNavigator nav = doc.CreateNavigator();
             XPathNodeIterator nodes = nav.Select(xpathNavPath);
             while (nodes.MoveNext())
             {
                 sb.Append(nodes.Current.Value + "\n");
             }
             string sr = sb.ToString();
             sr = sr.Replace("\r", "\n");
             sr = sr.Replace("\n\n", "\n");
             return sr;
         }
     }
     catch (Exception exp)
     {
         writer.Close();
         reader.Close();
         sw.Close();
         // sReader.Close();
         return exp.Message;
     }
 }
开发者ID:drzo,项目名称:opensim4opencog,代码行数:59,代码来源:HttpUtil.cs

示例8: HtmlReader

        /// <summary>
        /// コンストラクタ
        /// </summary>
        /// <param name="url">参照先URL</param>
        /// <param name="follow">robots.txt参照可否</param>
        /// <param name="agent">ユーザーエージェント</param>
        public HtmlReader(string url, bool follow = true, UserAgent agent = null, Encoding encoding = null)
        {
            // Httpリクエスト
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
            // ユーザーエージェント
            if (agent != null)
                req.UserAgent = agent.ToString();
            // robots.txt
            Robots robots = (follow) ? Robots.Create(new Uri(url)) : null;
            if (robots != null) {
                if (!robots.Parse(url))
                    throw new RobotsDisallowException("Robots Disallow [" + url + "]");
                if (robots.CrawlDelay != 0)
                    System.Threading.Thread.Sleep(robots.CrawlDelay * 1000);
            }

            using (HttpWebResponse res = (HttpWebResponse)req.GetResponse())
            using (Stream stream = res.GetResponseStream()) {
                Encoding enc = (encoding != null) ? encoding : Encoding.GetEncoding(res.CharacterSet);
                using (StreamReader reader = new StreamReader(stream, enc))
                using (SgmlReader sgml = new SgmlReader {
                    DocType = "HTML",
                    InputStream = reader,
                    CaseFolding = CaseFolding.ToLower,
                    IgnoreDtd = true
                }) {
                    Html = XDocument.Load(sgml, LoadOptions.None);
                    Uri = url;
                    Encoding = enc;
                }
            }
        }
开发者ID:t-kojima,项目名称:WebScrapingLibrary,代码行数:38,代码来源:HtmlReader.cs

示例9: GetXmlFromHtmlString

 public static String GetXmlFromHtmlString (String html)
 {
     using (SgmlReader sr = new SgmlReader())
     {
         sr.InputStream = new StringReader(html);
         return sr.ReadOuterXml();
     }
 }
开发者ID:xxjeng,项目名称:nuxleus,代码行数:8,代码来源:HttpSgmlToXml.cs

示例10: ParseHtml

 static XDocument ParseHtml(TextReader reader)
 {
     using (var sgmlReader = new SgmlReader { DocType = "HTML", CaseFolding = CaseFolding.ToLower })
     {
         sgmlReader.InputStream = reader;
         return XDocument.Load(sgmlReader);
     }
 }
开发者ID:Rapids,项目名称:Verde,代码行数:8,代码来源:GlobalForm.cs

示例11: ParseHtml

		private static XDocument ParseHtml( TextReader _Reader )
		{
			using ( var sgmlReader = new SgmlReader {
				DocType = "HTML",
				CaseFolding = CaseFolding.ToLower,
				InputStream = _Reader, } )
			{
				return XDocument.Load( sgmlReader );
			}
		}
开发者ID:TatsuyaHoshina,项目名称:Tatsuya.IIDX,代码行数:10,代码来源:IIDXWeb.cs

示例12: SetUp

 public void SetUp()
 {
     _sgmlReader =
     new SgmlReader
       {
     CaseFolding = CaseFolding.ToLower,
     DocType = "HTML",
     WhitespaceHandling = WhitespaceHandling.None
       };
 }
开发者ID:panuganti,项目名称:nreadability,代码行数:10,代码来源:SgmlReaderTests.cs

示例13: FetchXmlDocument

 XmlDocument FetchXmlDocument(Uri url)
 {
     var sr = FetchWebText (url);
     var xr = new SgmlReader () { InputStream = sr };
     var doc = new XmlDocument ();
     doc.Load (xr);
     sr.Close ();
     xr.Close ();
     return doc;
 }
开发者ID:atsushieno,项目名称:monodroid-schema-gen,代码行数:10,代码来源:type-hierarchy-importer.cs

示例14: FetchHtmlFromUrlAsXDocument

 public static XDocument FetchHtmlFromUrlAsXDocument(string url)
 {
     var webRequest = WebRequest.Create(url);
     using (var reader = new StreamReader(webRequest.GetResponse().GetResponseStream()))
     {
         var sgml = new SgmlReader();
         sgml.DocType = "HTML";
         sgml.CaseFolding = CaseFolding.ToLower;
         sgml.InputStream = reader;
         return new XDocument(XDocument.Load(sgml));
     }
 }
开发者ID:blanciq,项目名称:serialz,代码行数:12,代码来源:RequestHelper.cs

示例15: FindImgs

        internal static ImageInfo[] FindImgs(
            string htmlCode)
        {
            var r =
                new SgmlReader
                    {
                        DocType = @"HTML",
                        InputStream = new StringReader(htmlCode)
                    };
            var al = new List<ImageInfo>();

            //find <img src=""
            while (r.Read())
            {
                if (r.NodeType == XmlNodeType.Element)
                {
                    if (string.Compare(r.Name, @"img", StringComparison.OrdinalIgnoreCase) == 0)
                    {
                        if (r.HasAttributes)
                        {
                            var ii = new ImageInfo();

                            while (r.MoveToNextAttribute())
                            {
                                switch (r.Name.ToLowerInvariant())
                                {
                                    case @"src":
                                        ii.Source = r.Value;
                                        break;
                                    case @"width":
                                        ii.Width = ConvertHelper.ToInt32(r.Value);
                                        break;
                                    case @"height":
                                        ii.Height = ConvertHelper.ToInt32(r.Value);
                                        break;
                                }
                            }

                            // --

                            if (!string.IsNullOrEmpty(ii.Source))
                            {
                                al.Add(ii);
                            }
                        }
                    }
                }
            }

            return al.ToArray();
        }
开发者ID:jorik041,项目名称:ZetaHtmlEditControl,代码行数:51,代码来源:HtmlConversionHelper.cs


注:本文中的Sgml.SgmlReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。