本文整理汇总了C#中PropertyBag.GetResponse方法的典型用法代码示例。如果您正苦于以下问题:C# PropertyBag.GetResponse方法的具体用法?C# PropertyBag.GetResponse怎么用?C# PropertyBag.GetResponse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类PropertyBag
的用法示例。
在下文中一共展示了PropertyBag.GetResponse方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: Process
public void Process(Crawler crawler, PropertyBag propertyBag)
{
if (propertyBag.StatusCode != HttpStatusCode.OK)
{
return;
}
string extension = MapContentTypeToExtension(propertyBag.ContentType);
if (extension.IsNullOrEmpty())
{
return;
}
propertyBag.Title = propertyBag.Step.Uri.PathAndQuery;
using (TempFile temp = new TempFile())
{
temp.FileName += "." + extension;
using (FileStream fs = new FileStream(temp.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000))
using (Stream input = propertyBag.GetResponse())
{
input.CopyToStream(fs);
}
using (FilterReader filterReader = new FilterReader(temp.FileName))
{
string content = filterReader.ReadToEnd();
propertyBag.Text = content.Trim();
}
}
}
示例2: Process
public void Process(Crawler crawler, PropertyBag propertyBag)
{
if (propertyBag.StatusCode != HttpStatusCode.OK)
{
return;
}
using (TempFile tempFile = new TempFile())
{
using (FileStream fs = new FileStream(tempFile.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000))
using (Stream input = propertyBag.GetResponse())
{
input.CopyToStream(fs);
}
UltraID3 id3 = new UltraID3();
id3.Read(tempFile.FileName);
propertyBag["MP3_Album"].Value = id3.Album;
propertyBag["MP3_Artist"].Value = id3.Artist;
propertyBag["MP3_Comments"].Value = id3.Comments;
propertyBag["MP3_Duration"].Value = id3.Duration;
propertyBag["MP3_Genre"].Value = id3.Genre;
propertyBag["MP3_Title"].Value = id3.Title;
}
}
示例3: Process
public void Process(Crawler crawler, PropertyBag propertyBag)
{
if (propertyBag.StatusCode != HttpStatusCode.OK)
{
return;
}
if (!IsXmlContent(propertyBag.ContentType))
{
return;
}
using (Stream reader = propertyBag.GetResponse())
using (StreamReader sr = new StreamReader(reader))
{
XDocument mydoc = XDocument.Load(sr);
if (mydoc.Root == null)
{
return;
}
XName qualifiedName = XName.Get("loc", "http://www.sitemaps.org/schemas/sitemap/0.9");
IEnumerable<string> urlNodes =
from e in mydoc.Descendants(qualifiedName)
where !e.Value.IsNullOrEmpty() && e.Value.StartsWith("http://", StringComparison.OrdinalIgnoreCase)
select e.Value;
foreach (string url in urlNodes)
{
// add new crawler steps
string baseUrl = propertyBag.ResponseUri.GetLeftPart(UriPartial.Path);
string decodedLink = ExtendedHtmlUtility.HtmlEntityDecode(url);
string normalizedLink = NormalizeLink(baseUrl, decodedLink);
if (normalizedLink.IsNullOrEmpty())
{
continue;
}
crawler.AddStep(new Uri(normalizedLink), propertyBag.Step.Depth + 1,
propertyBag.Step, new Dictionary<string, object>
{
{Resources.PropertyBagKeyOriginalUrl, url},
{Resources.PropertyBagKeyOriginalReferrerUrl, propertyBag.ResponseUri}
});
}
}
}
示例4: Process
public void Process(Crawler crawler, PropertyBag propertyBag)
{
if (propertyBag.StatusCode != HttpStatusCode.OK)
{
return;
}
if (!IsTextContent(propertyBag.ContentType))
{
return;
}
using (Stream reader = propertyBag.GetResponse())
{
string content = reader.ReadToEnd();
propertyBag.Text = content.Trim();
}
}
示例5: Process
public void Process(Crawler crawler, PropertyBag propertyBag)
{
AspectF.Define.
NotNull(crawler, "crawler").
NotNull(propertyBag, "propertyBag");
if (propertyBag.StatusCode != HttpStatusCode.OK)
{
return;
}
if (!IsPdfContent(propertyBag.ContentType))
{
return;
}
using (Stream input = propertyBag.GetResponse())
{
PdfReader pdfReader = new PdfReader(input);
try
{
string title;
if (pdfReader.Info.TryGetValue("Title", out title))
{
propertyBag.Title = Convert.ToString(title, CultureInfo.InvariantCulture).Trim();
}
SimpleTextExtractionStrategy textExtractionStrategy = new SimpleTextExtractionStrategy();
propertyBag.Text = Enumerable.Range(1, pdfReader.NumberOfPages).
Select(pageNumber => PdfTextExtractor.GetTextFromPage(pdfReader, pageNumber, textExtractionStrategy)).
Join(Environment.NewLine);
}
finally
{
pdfReader.Close();
}
}
}
示例6: Process
public void Process(Crawler crawler, PropertyBag propertyBag)
{
AspectF.Define.
NotNull(crawler, "crawler").
NotNull(propertyBag, "propertyBag");
if (propertyBag.StatusCode != HttpStatusCode.OK)
{
return;
}
if (!IsHtmlContent(propertyBag.ContentType))
{
return;
}
HtmlDocument htmlDoc = new HtmlDocument
{
OptionAddDebuggingAttributes = false,
OptionAutoCloseOnEnd = true,
OptionFixNestedTags = true,
OptionReadEncoding = true
};
using (Stream reader = propertyBag.GetResponse())
{
Encoding documentEncoding = htmlDoc.DetectEncoding(reader);
reader.Seek(0, SeekOrigin.Begin);
if (!documentEncoding.IsNull())
{
htmlDoc.Load(reader, documentEncoding, true);
}
else
{
htmlDoc.Load(reader, true);
}
}
string originalContent = htmlDoc.DocumentNode.OuterHtml;
if (HasTextStripRules || HasSubstitutionRules)
{
string content = StripText(originalContent);
content = Substitute(content, propertyBag.Step);
using (TextReader tr = new StringReader(content))
{
htmlDoc.Load(tr);
}
}
propertyBag["HtmlDoc"].Value = htmlDoc;
HtmlNodeCollection nodes = htmlDoc.DocumentNode.SelectNodes("//title");
// Extract Title
if (!nodes.IsNull())
{
propertyBag.Title = string.Join(";", nodes.
Select(n => n.InnerText).
ToArray()).Trim();
}
// Extract Meta Data
nodes = htmlDoc.DocumentNode.SelectNodes("//meta[@content and @name]");
if (!nodes.IsNull())
{
propertyBag["Meta"].Value = (
from entry in nodes
let name = entry.Attributes["name"]
let content = entry.Attributes["content"]
where !name.IsNull() && !name.Value.IsNullOrEmpty() && !content.IsNull() && !content.Value.IsNullOrEmpty()
select name.Value + ": " + content.Value).ToArray();
}
propertyBag.Text = htmlDoc.ExtractText().Trim();
if (HasLinkStripRules || HasTextStripRules)
{
string content = StripLinks(originalContent);
using (TextReader tr = new StringReader(content))
{
htmlDoc.Load(tr);
}
}
// Extract Links
DocumentWithLinks links = htmlDoc.GetLinks();
foreach (string link in links.Links.Union(links.References))
{
if (link.IsNullOrEmpty())
{
continue;
}
string baseUrl = propertyBag.ResponseUri.GetLeftPart(UriPartial.Path);
string decodedLink = ExtendedHtmlUtility.HtmlEntityDecode(link);
string normalizedLink = NormalizeLink(baseUrl, decodedLink);
if (normalizedLink.IsNullOrEmpty())
{
continue;
}
crawler.AddStep(new Uri(normalizedLink), propertyBag.Step.Depth + 1,
propertyBag.Step, new Dictionary<string, object>
//.........这里部分代码省略.........
示例7: Process
public void Process(Crawler crawler, PropertyBag propertyBag)
{
AspectF.Define.
NotNull(crawler, "crawler").
NotNull(propertyBag, "propertyBag");
if (propertyBag.StatusCode != HttpStatusCode.OK)
{
return;
}
if (!IsPdfContent(propertyBag.ContentType))
{
return;
}
using (Stream input = propertyBag.GetResponse())
{
PdfReader pdfReader = new PdfReader(input);
try
{
object title = pdfReader.Info["Title"];
if (!title.IsNull())
{
string pdfTitle = Convert.ToString(title, CultureInfo.InvariantCulture).Trim();
if (!pdfTitle.IsNullOrEmpty())
{
propertyBag.Title = pdfTitle;
}
}
StringBuilder sb = new StringBuilder();
// Following code from:
// http://www.vbforums.com/showthread.php?t=475759
for (int p = 1; p <= pdfReader.NumberOfPages; p++)
{
byte[] pageBytes = pdfReader.GetPageContent(p);
if (pageBytes.IsNull())
{
continue;
}
PRTokeniser token = new PRTokeniser(pageBytes);
while (token.NextToken())
{
int tknType = token.TokenType;
string tknValue = token.StringValue;
if (tknType == PRTokeniser.TK_STRING)
{
sb.Append(token.StringValue);
sb.Append(" ");
}
else if (tknType == 1 && tknValue == "-600")
{
sb.Append(" ");
}
else if (tknType == 10 && tknValue == "TJ")
{
sb.Append(" ");
}
}
}
propertyBag.Text = sb.ToString();
}
finally
{
pdfReader.Close();
}
}
}