本文整理匯總了C#中Sgml.SgmlReader.SetBaseUri方法的典型用法代碼示例。如果您正苦於以下問題:C# SgmlReader.SetBaseUri方法的具體用法?C# SgmlReader.SetBaseUri怎麽用?C# SgmlReader.SetBaseUri使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類Sgml.SgmlReader
的用法示例。
在下文中一共展示了SgmlReader.SetBaseUri方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C#代碼示例。
示例1: Create
public static XmlReader Create(string baseUri, string html)
{
var assembly = typeof(SgmlReader).Assembly;
var name = "Html.dtd";
var dtd = default(SgmlDtd);
using (var resource = assembly.GetManifestResourceStream(name))
{
var input = new StreamReader(resource);
dtd = SgmlDtd.Parse(new Uri(baseUri), "HTML", input, null, null, null);
}
var reader = new SgmlReader
{
WhitespaceHandling = WhitespaceHandling.All,
CaseFolding = CaseFolding.ToLower,
Dtd = dtd,
IgnoreDtd = true,
InputStream = new StringReader(html),
};
reader.SetBaseUri(baseUri);
return reader;
}
示例2: Crawl
bool Crawl(SgmlDtd dtd, XmlDocument doc, TextWriter log)
{
depth++;
StringBuilder indent = new StringBuilder();
for (int i = 0; i < depth; i++)
indent.Append(" ");
count++;
Uri baseUri = new Uri(doc.BaseURI);
XmlElement baseElmt = (XmlElement)doc.SelectSingleNode("/html/head/base");
if (baseElmt != null) {
string href = baseElmt.GetAttribute("href");
if (href != "") {
try {
baseUri = new Uri(href);
}
catch (Exception ) {
Console.WriteLine("### Error parsing BASE href '"+href+"'");
}
}
}
foreach (XmlElement a in doc.SelectNodes("//a")) {
string href = a.GetAttribute("href");
if (href != "" && href != null && depth<5) {
Uri local = new Uri(baseUri, href);
if (domain && baseUri.Host != local.Host)
continue;
string ext = Path.GetExtension(local.AbsolutePath).ToLower();
if (ext == ".jpg" || ext == ".gif" || ext==".mpg")
continue;
string url = local.AbsoluteUri;
if (!visited.ContainsKey(url)) {
visited.Add(url, url);
log.WriteLine(indent+"Loading '"+url+"'");
log.Flush();
StreamReader stm = null;
try {
HttpWebRequest wr = (HttpWebRequest)WebRequest.Create(url);
wr.Timeout = 10000;
if (proxy != null) wr.Proxy = new WebProxy(proxy);
wr.PreAuthenticate = false;
// Pass the credentials of the process.
wr.Credentials = CredentialCache.DefaultCredentials;
WebResponse resp = wr.GetResponse();
Uri actual = resp.ResponseUri;
if (actual.AbsoluteUri != url) {
local = new Uri(actual.AbsoluteUri);
log.WriteLine(indent+"Redirected to '"+actual.AbsoluteUri+"'");
log.Flush();
}
if (resp.ContentType != "text/html") {
log.WriteLine(indent+"Skipping ContentType="+resp.ContentType);
log.Flush();
resp.Close();
}
else {
stm = new StreamReader(resp.GetResponseStream());
}
}
catch (Exception e) {
log.WriteLine(indent+"### Error opening URL: " + e.Message);
log.Flush();
}
if (stm != null) {
SgmlReader reader = new SgmlReader();
reader.Dtd = dtd;
reader.SetBaseUri(local.AbsoluteUri);
reader.InputStream = stm;
reader.WebProxy = proxy;
XmlDocument d2 = new XmlDocument();
d2.XmlResolver = null; // don't do any downloads!
try {
d2.Load(reader);
reader.Close();
stm.Close();
if (!Crawl(dtd, d2, log))
return false;
}
catch (Exception e) {
log.WriteLine(indent+"### Error parsing document '"+local.AbsoluteUri+"', "+e.Message);
log.Flush();
reader.Close();
}
}
}
}
}
depth--;
return true;
}
示例3: RunTest
/**************************************************************************
* Run a test suite. Tests suites are organized into expected input/output
* blocks separated by back quotes (`). It runs the input and compares it
* with the expected output and reports any failures.
**************************************************************************/
void RunTest(SgmlReader reader, string file)
{
Console.WriteLine(file);
StreamReader sr = new StreamReader(file);
StringBuilder input = new StringBuilder();
StringBuilder expectedOutput = new StringBuilder();
StringBuilder current = null;
StringBuilder args = new StringBuilder();
Uri baseUri = new Uri(new Uri(Directory.GetCurrentDirectory()+"\\"), file);
reader.SetBaseUri(baseUri.AbsoluteUri);
int start = 1;
int line = 1;
int pos = 1;
bool skipToEOL = false;
bool readArgs = false;
int i;
do {
i = sr.Read();
char ch = (char)i;
if (pos == 1 && ch == '`') {
++pos;
if (current == null) {
current = input;
current.Length = 0;
readArgs = true;
} else if (current == input) {
current = expectedOutput;
}
else {
RunTest(reader, start, args.ToString(), input.ToString(), expectedOutput.ToString());
start = line;
input.Length = 0;
args.Length = 0;
expectedOutput.Length = 0;
current = input;
readArgs = true;
}
skipToEOL = true;
} else {
++pos;
if(current != null) {
if (readArgs){
args.Append(ch);
} else if (!skipToEOL){
current.Append(ch);
}
}
if (ch == '\r') {
line++; pos = 1;
if (sr.Peek() == '\n') {
i = sr.Read();
if (!skipToEOL) current.Append((char)i);
if (readArgs) args.Append(ch);
}
skipToEOL = false;
readArgs = false;
} else if (ch == '\n'){
skipToEOL = false;
readArgs = false;
line++; pos = 1;
}
}
} while (i != -1);
if (current.Length>0 && expectedOutput.Length>0) {
RunTest(reader, start, args.ToString(), input.ToString(), expectedOutput.ToString());
}
}
示例4: ConvertHtmlToXHtml
/// <summary>
/// Converts the entry body into XHTML compliant text.
/// Returns false if it encounters a problem in doing so.
/// </summary>
/// <param name="entry">Entry.</param>
/// <returns></returns>
public static bool ConvertHtmlToXHtml(Entry entry)
{
SgmlReader reader = new SgmlReader();
reader.SetBaseUri(Config.CurrentBlog.RootUrl.ToString());
entry.Body = ConvertHtmlToXHtml(reader, entry.Body, null);
return true;
}
示例5: GetDocReader
/// <summary>
/// Gets the doc reader.
/// </summary>
/// <param name="html">The HTML.</param>
/// <param name="baseUri">The base URI.</param>
/// <returns></returns>
private static XmlReader GetDocReader(
string html,
Uri baseUri )
{
SgmlReader r = new SgmlReader();
if ( baseUri != null &&
!string.IsNullOrEmpty( baseUri.ToString() ) )
{
r.SetBaseUri( baseUri.ToString() );
}
r.DocType = @"HTML";
r.InputStream = new StringReader( html );
return r;
}
示例6: getDocReader
/// <summary>
///
/// </summary>
private static XmlReader getDocReader(
string html,
string baseUrl )
{
var r = new Sgml.SgmlReader();
if ( baseUrl.Length > 0 )
{
r.SetBaseUri( baseUrl );
}
r.DocType = @"HTML";
r.InputStream = new StringReader( html );
return r;
}
示例7: GetDocReader
/// <summary>
/// Detects URLs in styles.
/// </summary>
/// <param name="baseUri">The base URI.</param>
/// <param name="attributeName">Name of the attribute.</param>
/// <param name="attributeValue">The attribute value.</param>
/// <returns></returns>
//private List<UriResourceInformation> ExtractStyleUrls(
// Uri baseUri,
// string attributeName,
// string attributeValue)
//{
// List<UriResourceInformation> result =
// new List<UriResourceInformation>();
// if (string.Compare(attributeName, @"style", true) == 0)
// {
// if (attributeValue != null &&
// attributeValue.Trim().Length > 0)
// {
// MatchCollection matchs = Regex.Matches(
// attributeValue,
// @"url\s*\(\s*([^\)\s]+)\s*\)",
// RegexOptions.Singleline | RegexOptions.IgnoreCase);
// if (matchs.Count > 0)
// {
// foreach (Match match in matchs)
// {
// if (match != null && match.Success)
// {
// string url = match.Groups[1].Value;
// UriResourceInformation ui =
// new UriResourceInformation(
// _settings.Options,
// url,
// new Uri(url, UriKind.RelativeOrAbsolute),
// baseUri,
// UriType.Resource,
// _uriInfo.AbsoluteUri,
// );
// bool isOnSameSite =
// ui.IsOnSameSite(baseUri);
// if ((isOnSameSite ||
// !_settings.Options.StayOnSite) &&
// ui.IsProcessableUri)
// {
// result.Add(ui);
// }
// }
// }
// }
// }
// }
// return result;
//}
/// <summary>
/// Gets the doc reader.
/// </summary>
/// <param name="html">The HTML.</param>
/// <param name="baseUri">The base URI.</param>
/// <returns></returns>
private static XmlReader GetDocReader(
string html,
Uri baseUri)
{
SgmlReader r = new SgmlReader();
if (baseUri != null &&
!string.IsNullOrEmpty(baseUri.ToString()))
r.SetBaseUri(baseUri.ToString());
r.DocType = @"HTML";
r.WhitespaceHandling = WhitespaceHandling.All;
r.CaseFolding = CaseFolding.None;
StringReader sr = new StringReader(html);
r.InputStream = sr;
r.Read();
return r;
}