本文整理汇总了Java中org.jsoup.parser.Parser类的典型用法代码示例。如果您正苦于以下问题:Java Parser类的具体用法?Java Parser怎么用?Java Parser使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Parser类属于org.jsoup.parser包,在下文中一共展示了Parser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: action
import org.jsoup.parser.Parser; //导入依赖的package包/类
public void action(JSONObject data) throws EventException {
final JSONObject article = data.optJSONObject(Article.ARTICLE);
String content = article.optString(Article.ARTICLE_CONTENT);
final Document doc = Jsoup.parse(content, StringUtils.EMPTY, Parser.htmlParser());
doc.outputSettings().prettyPrint(false);
final StringBuilder listBuilder = new StringBuilder();
listBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + Latkes.getStaticServePath()
+ "/plugins/list/style.css\" />");
final Elements hs = doc.select("h1, h2, h3, h4, h5");
listBuilder.append("<ul class='b3-solo-list'>");
for (int i = 0; i < hs.size(); i++) {
final Element element = hs.get(i);
final String tagName = element.tagName().toLowerCase();
final String text = element.text();
final String id = "b3_solo_" + tagName + "_" + i;
element.before("<span id='" + id + "'></span>");
listBuilder.append("<li class='b3-solo-list-").append(tagName).append("'><a href='#").append(id)
.append("'>").append(text).append("</a></li>");
}
listBuilder.append("</ul>");
final Element body = doc.getElementsByTag("body").get(0);
content = listBuilder.toString() + body.html();
article.put(Article.ARTICLE_CONTENT, content);
}
示例2: getAndSaveTranscript
import org.jsoup.parser.Parser; //导入依赖的package包/类
/**
* Fetches captions/transcript for a given video
* @param videoID to fetch
* @param lang this captions should be in
* @throws IOException
*/
public void getAndSaveTranscript(String videoID, String lang) throws IOException {
lang = LanguageCode.convertIso2toIso1(lang);
String url = captionEndPoint+"lang="+lang+"&v="+videoID;
GetMethod get = new GetMethod(url);
this.client.executeMethod(get);
String xmlData = get.getResponseBodyAsString();
//parse XML
Document doc = Jsoup.parse(xmlData, "", Parser.xmlParser());
String allCaps = "";
for (Element e : doc.select("text")) {
allCaps += e.text();
}
FileSaver file = new FileSaver(allCaps, lang, "youtube_caps", url, videoID);
file.save(logDb);
}
示例3: videoHasCaptionsInLanguage
import org.jsoup.parser.Parser; //导入依赖的package包/类
/**
* Checks if a given video has captions in our target language. As identified by the user who entered them
* @param videoID to check
* @param lang target
* @return true if there are captions in lang
* @throws IOException
*/
public boolean videoHasCaptionsInLanguage(String videoID, String lang) throws IOException {
//visit captions index
GetMethod get = new GetMethod(captionsIndex+videoID);
this.client.executeMethod(get);
String xmlData = get.getResponseBodyAsString();
//parse XML
Document doc = Jsoup.parse(xmlData, "", Parser.xmlParser());
//iterate over all captions
for (Element e : doc.select("track")) {
String langCode = e.attr("lang_code");
String fixedLangCode = LanguageCode.convertIso1toIso2(langCode);
if(fixedLangCode.equals(lang))
return true;
}
return false;
}
示例4: parseHtmlTemplate
import org.jsoup.parser.Parser; //导入依赖的package包/类
/**
* Parse a given HTML template and return the a result object containing the expressions
* and a transformed HTML.
* @param htmlTemplate The HTML template to process, as a String
* @param context Context of the Component we are currently processing
* @return A {@link TemplateParserResult} containing the processed template and expressions
*/
public TemplateParserResult parseHtmlTemplate(String htmlTemplate,
TemplateParserContext context)
{
this.context = context;
Parser parser = Parser.htmlParser();
parser.settings(new ParseSettings(true, true)); // tag, attribute preserve case
Document doc = parser.parseInput(htmlTemplate, "");
result = new TemplateParserResult();
processImports(doc);
processNode(doc);
result.setProcessedTemplate(doc.body().html());
return result;
}
示例5: doInBackground
import org.jsoup.parser.Parser; //导入依赖的package包/类
@Override
protected ArrayList<News> doInBackground(Void ...voids) {
final ArrayList<News> newsList = new ArrayList<News>();
try {
final String endpoint = BASE_URL + NEWS_ENDPOINT;
final Document doc = Jsoup.connect(endpoint).parser(Parser.xmlParser()).get();
final Elements elements = doc.select("item");
for (final Element element : elements) {
final News news = new News();
news.title = element.select("title").text();
news.description = element.select("description").text();
news.thumbnail = element.select("media|thumbnail").text();
news.pubDate = element.select("pubDate").text();
news.link = element.select("link").text();
newsList.add(news);
}
} catch (IOException e){
Log.e(TAG, "FetchNews error", e);
}
return newsList;
}
示例6: removeHtmlTags
import org.jsoup.parser.Parser; //导入依赖的package包/类
public String removeHtmlTags(String markup) {
String clean = preMatcher.matcher(markup).replaceAll(" ");
clean = sourceMatcher.matcher(clean).replaceAll(" ");
clean = syntaxMatcher.matcher(clean).replaceAll(" ");
clean = commentMatcher.matcher(clean).replaceAll(" ");
clean = monoTagMatcher.matcher(clean).replaceAll(" ");
clean = fakeTagMatcher.matcher(clean).replaceAll(" ");
try {
Document document = Jsoup.parse(clean, "", Parser.xmlParser());
document.select("math, gallery, ref, br, ins, s, del, tt, blockqoute, table").html(" ");
clean = document.text();
} catch(Exception e) {
e.printStackTrace();
}
return clean;
}
示例7: loadTrack
import org.jsoup.parser.Parser; //导入依赖的package包/类
private AudioTrack loadTrack(String videoId) {
checkLoggedIn();
try (HttpInterface httpInterface = getHttpInterface()) {
try (CloseableHttpResponse response = httpInterface.execute(new HttpGet("http://ext.nicovideo.jp/api/getthumbinfo/" + videoId))) {
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != 200) {
throw new IOException("Unexpected response code from video info: " + statusCode);
}
Document document = Jsoup.parse(response.getEntity().getContent(), StandardCharsets.UTF_8.name(), "", Parser.xmlParser());
return extractTrackFromXml(videoId, document);
}
} catch (IOException e) {
throw new FriendlyException("Error occurred when extracting video info.", SUSPICIOUS, e);
}
}
示例8: getAllClasses
import org.jsoup.parser.Parser; //导入依赖的package包/类
@Override public List<String> getAllClasses() throws IOException, JSONException, CredentialInvalidException {
String baseurl = data.getString(PARAM_BASEURL) + "/";
for (int i = -4; i < MAX_DAYS; i++) {
LocalDate date = LocalDate.now().plusDays(i);
String dateStr = DateTimeFormat.forPattern("yyyyMMdd").print(date);
String url = baseurl + "mobdaten/PlanKl" + dateStr + ".xml?_=" + System.currentTimeMillis();
try {
String xml = httpGet(url, "UTF-8");
Document doc = Jsoup.parse(xml, url, Parser.xmlParser());
List<String> classes = new ArrayList<>();
for (Element klasse:doc.select("Klassen > Kl")) {
classes.add(klasse.select("Kurz").first().text());
}
return classes;
} catch (HttpResponseException e) {
if (e.getStatusCode() != 404 && e.getStatusCode() != 300) throw e;
}
}
return new ArrayList<>();
}
示例9: wrongMetaCharsetFallback
import org.jsoup.parser.Parser; //导入依赖的package包/类
@Test
public void wrongMetaCharsetFallback() {
try {
final byte[] input = "<html><head><meta charset=iso-8></head><body></body></html>".getBytes("UTF-8");
final ByteBuffer inBuffer = ByteBuffer.wrap(input);
Document doc = DataUtil.parseByteData(inBuffer, null, "http://example.com", Parser.htmlParser());
final String expected = "<html>\n" +
" <head>\n" +
" <meta charset=\"iso-8\">\n" +
" </head>\n" +
" <body></body>\n" +
"</html>";
assertEquals(expected, doc.toString());
} catch( UnsupportedEncodingException ex ) {
fail(ex.getMessage());
}
}
示例10: normalXmlParse
import org.jsoup.parser.Parser; //导入依赖的package包/类
public static void normalXmlParse(){
String json = CrawlerPack.getFromRemote(url);
String xml = CrawlerPack.jsonToXml(json);
// 原始 json 轉為 xml 的結果
System.out.println( "原始XML" ) ;
System.out.println( xml );
Document jsoupDoc = Jsoup.parse(xml, "", Parser.xmlParser());
jsoupDoc.charset(StandardCharsets.UTF_8);
// 發生了什麼事?
System.out.println( "轉換後XML" ) ;
System.out.println(jsoupDoc.toString());
}
示例11: doInBackground
import org.jsoup.parser.Parser; //导入依赖的package包/类
@Override
protected String doInBackground(String... strings) {
Document opmlDocument = null;
try {
if (mUrl != null) {
opmlDocument = Jsoup.connect(mUrl).parser(Parser.xmlParser()).get();
} else {
opmlDocument = Jsoup.parse(mFile, "UTF-8");
}
} catch (IOException e) {
e.printStackTrace();
return e.getMessage();
}
if (opmlDocument != null) {
mOpmlItems = opmlDocument.select("outline");
}
return "success";
}
示例12: getDocument
import org.jsoup.parser.Parser; //导入依赖的package包/类
public static Document getDocument(String filepath) {
Document doc = null;
InputStream is = null;
try {
is = new FileInputStream(filepath);
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
System.out.println("FileUtils: no such XML file path exists");
e1.printStackTrace();
return null;
}
try {
doc = Jsoup.parse(is, "UTF-8", "", Parser.xmlParser());
} catch (Exception e) {
System.out.println("Parse file to XML Document error!");
e.printStackTrace();
}
return doc;
}
示例13: getBranchRegex
import org.jsoup.parser.Parser; //导入依赖的package包/类
private static String getBranchRegex(String jobXml) {
try {
String branchRegex = Jsoup.parse(jobXml, "", Parser.xmlParser())
.getElementsByTag("gerritProjects").get(0)
.getElementsByTag(GERRITPROJECT_TAG).get(0)
.getElementsByTag("branches").get(0)
.getElementsByTag(BRANCH_TAG).get(0)
.getElementsByTag("pattern").get(0).html();
// Remove "^" and "$" at the beginning and the end, respectively
branchRegex = branchRegex.substring(1, branchRegex.length() - 1);
// Remove sections of regex that we add post-user-input
branchRegex = branchRegex.replace("(?!refs/meta/)", "");
branchRegex = branchRegex.replace("(?!refs/)", "refs/heads/");
return branchRegex;
} catch (IndexOutOfBoundsException e) {
return null;
}
}
示例14: parse
import org.jsoup.parser.Parser; //导入依赖的package包/类
private String parse(String str) {
Document document = Jsoup.parse(str, "", Parser.xmlParser());
String result = "";
switch (outType) {
case TYPE_TEXT:
result = document.text();
break;
case TYPE_HTML:
result = document.html();
break;
default:
result = document.text();
break;
}
return result;
}
示例15: getUserInfo
import org.jsoup.parser.Parser; //导入依赖的package包/类
/**
* Read information about user. Here you can read other important info.
* @throws Exception
*/
private void getUserInfo() throws Exception {
//https://www.box.net/api/1.0/rest?action=get_auth_token&api_key=vkf3k5dh0tg1ibvcikjcp8sx0f89d14u&ticket=
//https://www.box.net/api/1.0/rest?action=get_auth_token&api_key=vkf3k5dh0tg1ibvcikjcp8sx0f89d14u&ticket=xybt9orxzo1xrr5vk4r0axne804y1tpk
NULogger.getLogger().log(Level.INFO, "{0} Getting auth token value............", getClass());
httpGet = new NUHttpGet("https://www.box.net/api/1.0/rest?action=get_auth_token&api_key=vkf3k5dh0tg1ibvcikjcp8sx0f89d14u&ticket=" + ticket);
httpResponse = httpclient.execute(httpGet, httpContext);
responseString = EntityUtils.toString(httpResponse.getEntity());
//NULogger.getLogger().log(Level.INFO, "{0}Response : {1}", new Object[]{getClass(), stringResponse});
doc = Jsoup.parse(responseString, "", Parser.xmlParser());
String auth_token = doc.select("response auth_token").text();
NULogger.getLogger().log(Level.INFO, "{0} Auth_token : {1}", new Object[]{getClass(), auth_token});
properties().setEncryptedProperty(KEY_AUTH_TOKEN, auth_token);
}