本文整理匯總了C#中Sgml.SgmlReader.Close方法的典型用法代碼示例。如果您正苦於以下問題:C# SgmlReader.Close方法的具體用法?C# SgmlReader.Close怎麽用?C# SgmlReader.Close使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類Sgml.SgmlReader
的用法示例。
在下文中一共展示了SgmlReader.Close方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C#代碼示例。
示例1: ParseHtml
// Creates XmlDocument from html content and return it with rootitem "<root>".
public static XmlDocument ParseHtml(string sContent)
{
StringReader sr = new StringReader("<root>" + sContent + "</root>");
SgmlReader reader = new SgmlReader();
reader.WhitespaceHandling = WhitespaceHandling.All;
reader.CaseFolding = Sgml.CaseFolding.ToLower;
reader.InputStream = sr;
StringWriter sw = new StringWriter();
XmlTextWriter w = new XmlTextWriter(sw);
w.Formatting = Formatting.Indented;
w.WriteStartDocument();
reader.Read();
while (!reader.EOF)
{
w.WriteNode(reader, true);
}
w.Flush();
w.Close();
sw.Flush();
// create document
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.LoadXml(sw.ToString());
reader.Close();
return doc;
}
示例2: GetWellFormedHTML
public static string GetWellFormedHTML(string html, string xpathNavPath)
{
// StreamReader sReader = null;
StringWriter sw = null;
SgmlReader reader = null;
XmlTextWriter writer = null;
try
{
// if (uri == String.Empty) uri = "http://www.XMLforASP.NET";
// HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
// HttpWebResponse res = (HttpWebResponse)req.GetResponse();
// sReader = new StreamReader(res.GetResponseStream());
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(html);
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
//writer.WriteStartElement("Test");
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
//writer.WriteEndElement();
if (xpathNavPath == null)
{
string sr = sw.ToString();
sr = sr.Replace("\r", "\n");
sr = sr.Replace("\n\n", "\n");
return sr;
}
else
{ //Filter out nodes from HTML
StringBuilder sb = new StringBuilder();
XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
XPathNavigator nav = doc.CreateNavigator();
XPathNodeIterator nodes = nav.Select(xpathNavPath);
while (nodes.MoveNext())
{
sb.Append(nodes.Current.Value + "\n");
}
string sr = sb.ToString();
sr = sr.Replace("\r", "\n");
sr = sr.Replace("\n\n", "\n");
return sr;
}
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
// sReader.Close();
return exp.Message;
}
}
示例3: FetchXmlDocument
XmlDocument FetchXmlDocument(Uri url)
{
var sr = FetchWebText (url);
var xr = new SgmlReader () { InputStream = sr };
var doc = new XmlDocument ();
doc.Load (xr);
sr.Close ();
xr.Close ();
return doc;
}
示例4: Run
public void Run(string[] args)
{
SgmlReader reader = new SgmlReader();
string inputUri = null;
for (int i = 0; i < args.Length; i++) {
string arg = args[i];
if (arg[0] == '-' || arg[0] == '/') {
switch (arg.Substring(1)) {
case "e":
string errorlog = args[++i];
if (errorlog.ToLower() == "$stderr") {
reader.ErrorLog = Console.Error;
}
else {
reader.ErrorLogFile = errorlog;
}
break;
case "html":
reader.DocType = "HTML";
break;
case "dtd":
reader.SystemLiteral = args[++i];
break;
case "proxy":
proxy = args[++i];
reader.WebProxy = proxy;
break;
case "encoding":
encoding = Encoding.GetEncoding(args[++i]);
break;
case "f":
formatted = true;
reader.WhitespaceHandling = WhitespaceHandling.None;
break;
case "noxml":
noxmldecl = true;
break;
case "doctype":
reader.StripDocType = false;
break;
case "lower":
reader.CaseFolding = CaseFolding.ToLower;
break;
case "upper":
reader.CaseFolding = CaseFolding.ToUpper;
break;
default:
Console.WriteLine("Usage: SgmlReader <options> [InputUri] [OutputFile]");
Console.WriteLine("-e log Optional log file name, name of '$STDERR' will write errors to stderr");
Console.WriteLine("-f Whether to pretty print the output.");
Console.WriteLine("-html Specify the built in HTML dtd");
Console.WriteLine("-dtd url Specify other SGML dtd to use");
Console.WriteLine("-base Add base tag to output HTML");
Console.WriteLine("-noxml Do not add XML declaration to the output");
Console.WriteLine("-proxy svr:80 Proxy server to use for http requests");
Console.WriteLine("-encoding name Specify an encoding for the output file (default UTF-8)");
Console.WriteLine("-lower Convert input tags to lower case");
Console.WriteLine("-upper Convert input tags to upper case");
Console.WriteLine();
Console.WriteLine("InputUri The input file or http URL (default stdin). ");
Console.WriteLine(" Supports wildcards for local file names.");
Console.WriteLine("OutputFile Output file name (default stdout)");
Console.WriteLine(" If input file contains wildcards then this just specifies the output file extension (default .xml)");
return;
}
}
else {
if (inputUri == null) {
inputUri = arg;
string ext = Path.GetExtension(arg).ToLower();
if (ext == ".htm" || ext == ".html")
reader.DocType = "HTML";
}
else if (output == null) output = arg;
}
}
if (inputUri != null && !inputUri.StartsWith("http://") && inputUri.IndexOfAny(new char[] { '*', '?' }) >= 0) {
// wild card processing of a directory of files.
string path = Path.GetDirectoryName(inputUri);
if (path == "") path = ".\\";
string ext = ".xml";
if (output != null)
ext = Path.GetExtension(output);
foreach (string uri in Directory.GetFiles(path, Path.GetFileName(inputUri))) {
Console.WriteLine("Processing: " + uri);
string file = Path.GetFileName(uri);
output = Path.GetDirectoryName(uri) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(file) + ext;
Process(reader, uri);
reader.Close();
}
return;
}
Process(reader, inputUri);
reader.Close();
return ;
}
示例5: Crawl
bool Crawl(SgmlDtd dtd, XmlDocument doc, TextWriter log)
{
depth++;
StringBuilder indent = new StringBuilder();
for (int i = 0; i < depth; i++)
indent.Append(" ");
count++;
Uri baseUri = new Uri(doc.BaseURI);
XmlElement baseElmt = (XmlElement)doc.SelectSingleNode("/html/head/base");
if (baseElmt != null) {
string href = baseElmt.GetAttribute("href");
if (href != "") {
try {
baseUri = new Uri(href);
}
catch (Exception ) {
Console.WriteLine("### Error parsing BASE href '"+href+"'");
}
}
}
foreach (XmlElement a in doc.SelectNodes("//a")) {
string href = a.GetAttribute("href");
if (href != "" && href != null && depth<5) {
Uri local = new Uri(baseUri, href);
if (domain && baseUri.Host != local.Host)
continue;
string ext = Path.GetExtension(local.AbsolutePath).ToLower();
if (ext == ".jpg" || ext == ".gif" || ext==".mpg")
continue;
string url = local.AbsoluteUri;
if (!visited.ContainsKey(url)) {
visited.Add(url, url);
log.WriteLine(indent+"Loading '"+url+"'");
log.Flush();
StreamReader stm = null;
try {
HttpWebRequest wr = (HttpWebRequest)WebRequest.Create(url);
wr.Timeout = 10000;
if (proxy != null) wr.Proxy = new WebProxy(proxy);
wr.PreAuthenticate = false;
// Pass the credentials of the process.
wr.Credentials = CredentialCache.DefaultCredentials;
WebResponse resp = wr.GetResponse();
Uri actual = resp.ResponseUri;
if (actual.AbsoluteUri != url) {
local = new Uri(actual.AbsoluteUri);
log.WriteLine(indent+"Redirected to '"+actual.AbsoluteUri+"'");
log.Flush();
}
if (resp.ContentType != "text/html") {
log.WriteLine(indent+"Skipping ContentType="+resp.ContentType);
log.Flush();
resp.Close();
}
else {
stm = new StreamReader(resp.GetResponseStream());
}
}
catch (Exception e) {
log.WriteLine(indent+"### Error opening URL: " + e.Message);
log.Flush();
}
if (stm != null) {
SgmlReader reader = new SgmlReader();
reader.Dtd = dtd;
reader.SetBaseUri(local.AbsoluteUri);
reader.InputStream = stm;
reader.WebProxy = proxy;
XmlDocument d2 = new XmlDocument();
d2.XmlResolver = null; // don't do any downloads!
try {
d2.Load(reader);
reader.Close();
stm.Close();
if (!Crawl(dtd, d2, log))
return false;
}
catch (Exception e) {
log.WriteLine(indent+"### Error parsing document '"+local.AbsoluteUri+"', "+e.Message);
log.Flush();
reader.Close();
}
}
}
}
}
depth--;
return true;
}
示例6: StartCrawl
/***************************************************************************
* Useful debugging code...
* **************************************************************************/
void StartCrawl(SgmlReader reader, string uri, bool basify)
{
Console.WriteLine("Loading '"+reader.BaseURI+"'");
XmlDocument doc = new XmlDocument();
try {
doc.XmlResolver = null; // don't do any downloads!
doc.Load(reader);
}
catch (Exception e) {
Console.WriteLine("Error loading document\n"+e.Message);
}
reader.Close();
if (basify) {
// html and head are option, if they are there use them otherwise not.
XmlElement be = (XmlElement)doc.SelectSingleNode("//base");
if (be == null) {
be = doc.CreateElement("base");
be.SetAttribute("href", doc.BaseURI);
XmlElement head = (XmlElement)doc.SelectSingleNode("//head");
if (head != null) {
head.InsertBefore(be, head.FirstChild);
}
else {
XmlElement html = (XmlElement)doc.SelectSingleNode("//html");
if (html != null) html.InsertBefore(be, html.FirstChild);
else doc.DocumentElement.InsertBefore(be, doc.DocumentElement.FirstChild);
}
}
}
try {
Crawl(reader.Dtd, doc, reader.ErrorLog);
}
catch (Exception e) {
Console.WriteLine("Uncaught exception: " + e.Message);
}
}
示例7: Process
void Process(SgmlReader reader, string uri, bool loadAsStream)
{
if (uri == null) {
reader.InputStream = Console.In;
}
else if (loadAsStream) {
Uri location = new Uri(uri);
if (location.IsFile) {
reader.InputStream = new StreamReader(uri);
} else {
WebRequest wr = WebRequest.Create(location);
reader.InputStream = new StreamReader(wr.GetResponse().GetResponseStream());
}
} else {
reader.Href = uri;
}
if (debug) {
Debug(reader);
reader.Close();
return;
}
if (crawl) {
StartCrawl(reader, uri, basify);
return;
}
if (this.encoding == null) {
this.encoding = reader.GetEncoding();
}
XmlTextWriter w = null;
if (output != null) {
w = new XmlTextWriter(output, this.encoding);
}
else {
w = new XmlTextWriter(Console.Out);
}
if (formatted) w.Formatting = Formatting.Indented;
if (!noxmldecl) {
w.WriteStartDocument();
}
if (testdoc) {
XmlDocument doc = new XmlDocument();
try {
doc.Load(reader);
doc.WriteTo(w);
} catch (XmlException e) {
Console.WriteLine("Error:" + e.Message);
Console.WriteLine("at line " + e.LineNumber + " column " + e.LinePosition);
}
} else {
reader.Read();
while (!reader.EOF) {
w.WriteNode(reader, true);
}
}
w.Flush();
w.Close();
}
示例8: GetWellFormedHTML_Handle
/// <summary>
private string GetWellFormedHTML_Handle(string uri)
{
StreamReader sReader = null;
StringWriter sw = null;
SgmlReader reader = null;
XmlTextWriter writer = null;
try
{
if (uri == String.Empty) uri = "http://www.ypshop.net/list--91-940-940--search-1.html";
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse res = (HttpWebResponse)req.GetResponse();
sReader = new StreamReader(res.GetResponseStream());
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(sReader.ReadToEnd());
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
StringBuilder sb = new StringBuilder();
XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
XPathNavigator nav = doc.CreateNavigator();
//XPathNodeIterator nodes = nav.Select(xpath);
//while (nodes.MoveNext())
//{
// sb.Append(nodes.Current.Value + " ");
//}
return sb.ToString();
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
sReader.Close();
return exp.Message;
}
}
示例9: GetWellFormedHTML
/// <summary>
/// 讀取html頁麵內容
/// </summary>
/// <param name="uri">網址</param>
/// <param name="xpath">xpath標簽</param>
/// <returns></returns>
private string GetWellFormedHTML(string uri, string xpath)
{
StreamReader sReader = null;//讀取字節流
StringWriter sw = null;//寫入字符串
SgmlReader reader = null;//sgml讀取方法
XmlTextWriter writer = null;//生成xml數據流
try
{
if (uri == String.Empty)
uri = "http://www.ypshop.net/list--91-940-940--search-1.html";
WebClient webclient = new WebClient();
webclient.Encoding = Encoding.UTF8;
//頁麵內容
string strWebContent = webclient.DownloadString(uri);
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(strWebContent);
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
//return sw.ToString();
if (xpath == null)
{
return sw.ToString();
}
else
{ //Filter out nodes from HTML
StringBuilder sb = new StringBuilder();
XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
XPathNavigator nav = doc.CreateNavigator();
XPathNodeIterator nodes = nav.Select(xpath);
while (nodes.MoveNext())
{
sb.Append(nodes.Current.Value + " ");
}
return sb.ToString();
}
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
sReader.Close();
return exp.Message;
}
}
示例10: ToXhtml
/// <summary>
/// ת����Xhtml
/// </summary>
/// <param name="html">html����</param>
/// <returns>Xhtml����</returns>
public static string ToXhtml(string html)
{
SgmlReader reader = new SgmlReader();
reader.CaseFolding = CaseFolding.ToLower;
reader.DocType = "HTML";
reader.InputStream = new StringReader(html);
StringWriter sw = new StringWriter(CultureInfo.InvariantCulture);
XmlTextWriter writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
reader.WhitespaceHandling = WhitespaceHandling.None;
while (!reader.EOF)
{
writer.WriteNode(reader, true);
}
reader.Close();
sw.Close();
writer.Close();
return sw.ToString();
}
示例11: Proceed
public XmlDocument Proceed()
{
HttpWebRequest req = (HttpWebRequest) HttpWebRequest.Create(_uri);
WebResponse response = req.GetResponse();
var st = response.GetResponseStream();
System.IO.TextReader tr = new System.IO.StreamReader(st, System.Text.Encoding.GetEncoding(1251)) ;
Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader();
sgmlReader.DocType = "HTML";
sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
sgmlReader.InputStream = tr;
// create document
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.Load(sgmlReader);
response.Close();
sgmlReader.Close();
_doc = doc;
return doc;
}
示例12: GetParsableString
/// <summary>
/// Parse a HTML to XML and returns a string, if error occurs returns an exception.
/// </summary>
/// <remarks> Use this method when you want to catch a parsing error.</remarks>
/// <param name="html"> HTML string to parse.</param>
/// <returns>A string with the parsed value.</returns>
public string GetParsableString(string html)
{
html = PreProcessHtml(html);
SgmlReader reader = new SgmlReader();
// set SgmlReader values
reader.DocType = "HTML";
// lower case all
reader.InputStream = new StringReader(html);
// write to xml
StringWriter sw = new StringWriter();
XmlTextWriter w = new XmlTextWriter(sw);
w.Formatting = Formatting.Indented;
try
{
while (reader.Read())
{
if ( (reader.NodeType != XmlNodeType.DocumentType) && (this.ParserProperties.RemoveDocumentType) )
{
if ( reader.NodeType != XmlNodeType.Whitespace )
{
// Write entire reader to xml
w.WriteNode(reader, true);
}
}
}
return PostProcessHtml(sw.ToString());
}
catch
{
throw;
}
finally
{
reader.Close();
w.Close();
}
}