本文整理汇总了Java中org.jsoup.nodes.Document.text方法的典型用法代码示例。如果您正苦于以下问题:Java Document.text方法的具体用法?Java Document.text怎么用?Java Document.text使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.jsoup.nodes.Document
的用法示例。
在下文中一共展示了Document.text方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: process
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* 解析页面
* process函数需要完成的有:
* 1.解析有用的信息,丢进去Page的List items中。之后save会进行存储!
*
* @param page
* @return 自己
*/
public Page process(Page page) {
Document doc = page.getDocument();
String title = doc.title();
String text = doc.text();
Map<String, String> items = new HashMap<String, String>();
items.put("title", title);
items.put("text", text);
items.put("url", page.getUrlSeed().getUrl());
page.setItems(items);
return page;
}
示例2: canGetC3PO
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Test
public void canGetC3PO() throws IOException {
Document doc = Jsoup.connect("http://swapi.co/api/people/2/?format=json").ignoreContentType(true).get();
String json = doc.text();
System.out.println(json);
// JSoup does not supply JSON parsing routines
Assert.assertTrue(json.contains("C-3PO"));
}
示例3: fetchAndSave
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public AbstractMap.SimpleEntry<Integer, Integer> fetchAndSave() throws Exception {
URL url = new URL(this.url);
SyndFeedInput input = new SyndFeedInput();
SyndFeed feed = input.build(new XmlReader(url));
int items = feed.getEntries().size();
if(items > 0){
log.info("Attempting to parse rss feed: "+ this.url );
log.info("This Feed has "+items +" items");
}
List <SyndEntry> entries = feed.getEntries();
for (SyndEntry item : entries){
log.info("Title: " + item.getTitle());
log.info("Link: " + item.getLink());
SyndContentImpl contentHolder = (SyndContentImpl) item.getContents().get(0);
String content = contentHolder.getValue();
//content might contain html data, let's clean it up
Document doc = Jsoup.parse(content);
content = doc.text();
try {
Result result = ld.detectLanguage(content, language);
if (result.languageCode.equals(language) && result.isReliable) {
FileSaver file = new FileSaver(content, this.language, "bs", item.getLink(), item.getUri(), String.valueOf(content.hashCode()));
String fileName = file.getFileName();
BlogPost post = new BlogPost(content,this.language,null,"bs",item.getLink(),item.getUri(),fileName);
if(DAO.saveEntry(post)) {
file.save(this.logDb);
numOfFiles++;
wrongCount = 0;
}
}
else{
log.info("Item " + item.getTitle() + "is in a diff languageCode, skipping this post "+ result.languageCode);
wrongCount ++;
if(wrongCount > 3){
log.info("Already found 3 posts in the wrong languageCode, skipping this blog");
}
break;
}
}
catch(Exception e){
log.error(e);
break;
}
}
return new AbstractMap.SimpleEntry<>(numOfFiles,wrongCount);
}
示例4: getText
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
String getText(final HtmlBlock node) {
final Document document = Jsoup.parseBodyFragment(node.getChars().toString());
return document.text();
}
示例5: main
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public static void main(String[] args) {
try{
// connect to the website '1
Connection connection = Jsoup.connect("http://www.bluetata.com");
// get the HTML document '2
Document doc = connection.get();
// parse text from HTML '3
String strHTML = doc.text();
// out put dom '4
System.out.println(strHTML);
}catch(IOException ioex){
ioex.printStackTrace();
}
}
示例6: canGetLuke
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Test
public void canGetLuke() throws IOException {
// have to ignore content type or it throws exception if not text/*, application/xml, or application/xhtml+xml
Document doc = Jsoup.connect("http://swapi.co/api/people/1/?format=json").ignoreContentType(true).get();
String json = doc.text();
System.out.println(json);
// JSoup does not supply JSON parsing routines
Assert.assertTrue(json.contains("Luke Skywalker"));
}