本文整理匯總了C#中Sgml.SgmlReader類的典型用法代碼示例。如果您正苦於以下問題:C# SgmlReader類的具體用法?C# SgmlReader怎麽用?C# SgmlReader使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
SgmlReader類屬於Sgml命名空間,在下文中一共展示了SgmlReader類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C#代碼示例。
示例1: Main
static void Main(string[] args)
{
var array = new JArray();
var crawled = new HashSet<string>();
var sgmlReader = new SgmlReader
{
Href = "http://groups.google.com/group/ravendb/web/docs-http-api-index"
};
crawled.Add(sgmlReader.Href);
var doc = new XmlDocument();
doc.Load(sgmlReader);
var layout = doc.SelectSingleNode("//div[@class='layout']");
var index = new JObject(new JProperty("Html", FixLinks(layout.InnerXml)), new JProperty("Name", "Index"));
array.Add(new JObject(
new JProperty("DocId", "raven_documentation/index"),
new JProperty("Document", index),
new JProperty("Type", "raven documentation"),
new JProperty("Metadata",
new JObject(new JProperty("Raven-View-Template", "/raven/JSONTemplates/documentation.html")))
));
AddDocumentsFromLinks(array, crawled, layout.SelectNodes(".//a"));
File.WriteAllText(args[0], array.ToString(Formatting.Indented));
}
示例2: Create
public static XmlReader Create(string baseUri, string html)
{
var assembly = typeof(SgmlReader).Assembly;
var name = "Html.dtd";
var dtd = default(SgmlDtd);
using (var resource = assembly.GetManifestResourceStream(name))
{
var input = new StreamReader(resource);
dtd = SgmlDtd.Parse(new Uri(baseUri), "HTML", input, null, null, null);
}
var reader = new SgmlReader
{
WhitespaceHandling = WhitespaceHandling.All,
CaseFolding = CaseFolding.ToLower,
Dtd = dtd,
IgnoreDtd = true,
InputStream = new StringReader(html),
};
reader.SetBaseUri(baseUri);
return reader;
}
示例3: ParseHtml
// Creates XmlDocument from html content and return it with rootitem "<root>".
public static XmlDocument ParseHtml(string sContent)
{
StringReader sr = new StringReader("<root>" + sContent + "</root>");
SgmlReader reader = new SgmlReader();
reader.WhitespaceHandling = WhitespaceHandling.All;
reader.CaseFolding = Sgml.CaseFolding.ToLower;
reader.InputStream = sr;
StringWriter sw = new StringWriter();
XmlTextWriter w = new XmlTextWriter(sw);
w.Formatting = Formatting.Indented;
w.WriteStartDocument();
reader.Read();
while (!reader.EOF)
{
w.WriteNode(reader, true);
}
w.Flush();
w.Close();
sw.Flush();
// create document
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.LoadXml(sw.ToString());
reader.Close();
return doc;
}
示例4: Parser
public Parser()
{
_sgmlReader = new SgmlReader();
_sgmlReader.DocType = "HTML";
_sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
_sgmlReader.CaseFolding = CaseFolding.ToLower;
}
示例5: Main
static void Main(string[] args)
{
if (args.Length < 2) {
Console.WriteLine("Usage: BenchSgmlReader.exe filename iterations");
return;
}
var streamReader = new StreamReader(args[0]);
string text = streamReader.ReadToEnd();
streamReader.Close();
int n = int.Parse(args[1]);
var start = DateTime.Now;
for (int i = 0; i < n; i++) {
SgmlReader sgmlReader = new SgmlReader();
sgmlReader.DocType = "HTML";
sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
//sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
sgmlReader.InputStream = new StringReader(text);
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.Load(sgmlReader);
}
var stop = DateTime.Now;
var duration = stop - start;
Console.WriteLine("{0} s", (duration.TotalMilliseconds / 1000.0).ToString(CultureInfo.InvariantCulture));
}
示例6: GetPv
public static int GetPv(int cid, DateTime date)
{
var hatenaId = ConfigurationManager.AppSettings["hatenaId"];
var hatenaPassword = ConfigurationManager.AppSettings["hatenaPassword"];
var wc = new CustomWebClient() { Encoding = Encoding.UTF8 };
wc.Headers.Add("Content-Type", "application/x-www-form-urlencoded");
var data = string.Format(LoginParamBase, hatenaId, hatenaPassword);
wc.UploadString("https://www.hatena.ne.jp/login", "POST", data);
var url = string.Format(CounterUrlBase, hatenaId, cid, date.ToString("yyyy-MM-dd"));
var res = wc.DownloadString(url);
XDocument xml;
using (var sgml = new SgmlReader() { IgnoreDtd = true })
{
sgml.InputStream = new StringReader(res);
xml = XDocument.Load(sgml);
}
var ns = xml.Root.Name.Namespace;
var count = xml.Descendants(ns + "table")
.Where(x => x.FirstAttribute.Value == "totalcount")
.Descendants(ns + "strong")
.First().Value;
return int.Parse(count);
}
示例7: GetWellFormedHTML
public static string GetWellFormedHTML(string html, string xpathNavPath)
{
// StreamReader sReader = null;
StringWriter sw = null;
SgmlReader reader = null;
XmlTextWriter writer = null;
try
{
// if (uri == String.Empty) uri = "http://www.XMLforASP.NET";
// HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
// HttpWebResponse res = (HttpWebResponse)req.GetResponse();
// sReader = new StreamReader(res.GetResponseStream());
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(html);
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
//writer.WriteStartElement("Test");
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
//writer.WriteEndElement();
if (xpathNavPath == null)
{
string sr = sw.ToString();
sr = sr.Replace("\r", "\n");
sr = sr.Replace("\n\n", "\n");
return sr;
}
else
{ //Filter out nodes from HTML
StringBuilder sb = new StringBuilder();
XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
XPathNavigator nav = doc.CreateNavigator();
XPathNodeIterator nodes = nav.Select(xpathNavPath);
while (nodes.MoveNext())
{
sb.Append(nodes.Current.Value + "\n");
}
string sr = sb.ToString();
sr = sr.Replace("\r", "\n");
sr = sr.Replace("\n\n", "\n");
return sr;
}
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
// sReader.Close();
return exp.Message;
}
}
示例8: HtmlReader
/// <summary>
/// コンストラクタ
/// </summary>
/// <param name="url">參照先URL</param>
/// <param name="follow">robots.txt參照可否</param>
/// <param name="agent">ユーザーエージェント</param>
public HtmlReader(string url, bool follow = true, UserAgent agent = null, Encoding encoding = null)
{
// Httpリクエスト
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
// ユーザーエージェント
if (agent != null)
req.UserAgent = agent.ToString();
// robots.txt
Robots robots = (follow) ? Robots.Create(new Uri(url)) : null;
if (robots != null) {
if (!robots.Parse(url))
throw new RobotsDisallowException("Robots Disallow [" + url + "]");
if (robots.CrawlDelay != 0)
System.Threading.Thread.Sleep(robots.CrawlDelay * 1000);
}
using (HttpWebResponse res = (HttpWebResponse)req.GetResponse())
using (Stream stream = res.GetResponseStream()) {
Encoding enc = (encoding != null) ? encoding : Encoding.GetEncoding(res.CharacterSet);
using (StreamReader reader = new StreamReader(stream, enc))
using (SgmlReader sgml = new SgmlReader {
DocType = "HTML",
InputStream = reader,
CaseFolding = CaseFolding.ToLower,
IgnoreDtd = true
}) {
Html = XDocument.Load(sgml, LoadOptions.None);
Uri = url;
Encoding = enc;
}
}
}
示例9: GetXmlFromHtmlString
public static String GetXmlFromHtmlString (String html)
{
using (SgmlReader sr = new SgmlReader())
{
sr.InputStream = new StringReader(html);
return sr.ReadOuterXml();
}
}
示例10: ParseHtml
static XDocument ParseHtml(TextReader reader)
{
using (var sgmlReader = new SgmlReader { DocType = "HTML", CaseFolding = CaseFolding.ToLower })
{
sgmlReader.InputStream = reader;
return XDocument.Load(sgmlReader);
}
}
示例11: ParseHtml
private static XDocument ParseHtml( TextReader _Reader )
{
using ( var sgmlReader = new SgmlReader {
DocType = "HTML",
CaseFolding = CaseFolding.ToLower,
InputStream = _Reader, } )
{
return XDocument.Load( sgmlReader );
}
}
示例12: SetUp
public void SetUp()
{
_sgmlReader =
new SgmlReader
{
CaseFolding = CaseFolding.ToLower,
DocType = "HTML",
WhitespaceHandling = WhitespaceHandling.None
};
}
示例13: FetchXmlDocument
XmlDocument FetchXmlDocument(Uri url)
{
var sr = FetchWebText (url);
var xr = new SgmlReader () { InputStream = sr };
var doc = new XmlDocument ();
doc.Load (xr);
sr.Close ();
xr.Close ();
return doc;
}
示例14: FetchHtmlFromUrlAsXDocument
public static XDocument FetchHtmlFromUrlAsXDocument(string url)
{
var webRequest = WebRequest.Create(url);
using (var reader = new StreamReader(webRequest.GetResponse().GetResponseStream()))
{
var sgml = new SgmlReader();
sgml.DocType = "HTML";
sgml.CaseFolding = CaseFolding.ToLower;
sgml.InputStream = reader;
return new XDocument(XDocument.Load(sgml));
}
}
示例15: FindImgs
internal static ImageInfo[] FindImgs(
string htmlCode)
{
var r =
new SgmlReader
{
DocType = @"HTML",
InputStream = new StringReader(htmlCode)
};
var al = new List<ImageInfo>();
//find <img src=""
while (r.Read())
{
if (r.NodeType == XmlNodeType.Element)
{
if (string.Compare(r.Name, @"img", StringComparison.OrdinalIgnoreCase) == 0)
{
if (r.HasAttributes)
{
var ii = new ImageInfo();
while (r.MoveToNextAttribute())
{
switch (r.Name.ToLowerInvariant())
{
case @"src":
ii.Source = r.Value;
break;
case @"width":
ii.Width = ConvertHelper.ToInt32(r.Value);
break;
case @"height":
ii.Height = ConvertHelper.ToInt32(r.Value);
break;
}
}
// --
if (!string.IsNullOrEmpty(ii.Source))
{
al.Add(ii);
}
}
}
}
}
return al.ToArray();
}