本文整理汇总了Java中org.apache.pdfbox.text.TextPosition类的典型用法代码示例。如果您正苦于以下问题:Java TextPosition类的具体用法?Java TextPosition怎么用?Java TextPosition使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TextPosition类属于org.apache.pdfbox.text包,在下文中一共展示了TextPosition类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: writeString
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
for (TextPosition text : textPositions) {
String row =
round(text.getXDirAdj()) + "|" +
round(text.getYDirAdj()) + "|" +
round(text.getFontSize()) + "|" +
round(text.getXScale()) + "|" +
round(text.getHeightDir()) + "|" +
round(text.getWidthOfSpace()) + "|" +
round(text.getWidthDirAdj()) + "|" +
text.getUnicode();
writer.write(System.lineSeparator() + row.trim());
}
}
示例2: newFor
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
public static Text newFor(TextPosition tp, PDGraphicsState gs,String text) {
Text t = new Text();
t.x = tp.getXDirAdj();
t.baseline = tp.getYDirAdj();
t.font = tp.getFont();
t.strokeColor = gs.getStrokingColor();
t.nonStrokeColor = gs.getNonStrokingColor();
t.run = tp.getUnicode();
t.width = tp.getWidth();
t.height = tp.getHeight();
t.pointSize = tp.getFontSizeInPt();
t.fontSize = tp.getYScale();
t.tempRun = t.run;
// Bump the width by the word spacing for each space in tp.
/* for (int i=0; i<tp.getCharacter().length(); i++) {
Character c = tp.getCharacter().charAt(i);
if (c.equals(" ")) {
t.width -= tp.getWidthOfSpace();
t.width += tp.getWordSpacing();
}
}
*/
return t;
}
示例3: deleteCharsInPath
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
void deleteCharsInPath() {
for (List<TextPosition> list : charactersByArticle) {
List<TextPosition> toRemove = new ArrayList<>();
for (TextPosition text : list) {
Matrix textMatrix = text.getTextMatrix();
Vector start = textMatrix.transform(new Vector(0, 0));
Vector end = new Vector(start.getX() + text.getWidth(), start.getY());
if (linePath.contains(lowerLeftX + start.getX(), lowerLeftY + start.getY()) ||
(checkEndPointToo && linePath.contains(lowerLeftX + end.getX(), lowerLeftY + end.getY()))) {
toRemove.add(text);
}
}
if (toRemove.size() != 0) {
System.out.println(toRemove.size());
list.removeAll(toRemove);
}
}
}
示例4: testCoverTextByRectanglesMwbI201711
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
/**
* <a href="https://stackoverflow.com/questions/46080131/text-coordinates-when-stripping-from-pdfbox">
* Text coordinates when stripping from PDFBox
* </a>
* <br/>
* <a href="https://download-a.akamaihd.net/files/media_mwb/b7/mwb_I_201711.pdf">
* mwb_I_201711.pdf
* </a>
* <p>
* This test applies the OP's code to his example PDF file and indeed, there is an offset!
* This is due to the <code>LegacyPDFStreamEngine</code> method <code>showGlyph</code>
* which manipulates the text rendering matrix to make the lower left corner of the
* crop box the origin. In the current version of this test, that offset is corrected,
* see below.
* </p>
*/
@Test
public void testCoverTextByRectanglesMwbI201711() throws IOException {
try ( InputStream resource = getClass().getResourceAsStream("mwb_I_201711.pdf") ) {
PDDocument doc = PDDocument.load(resource);
myStripper stripper = new myStripper();
stripper.setStartPage(1); // fix it to first page just to test it
stripper.setEndPage(1);
stripper.getText(doc);
TextLine line = stripper.lines.get(1); // the line i want to paint on
float minx = -1;
float maxx = -1;
for (TextPosition pos: line.textPositions)
{
if (pos == null)
continue;
if (minx == -1 || pos.getTextMatrix().getTranslateX() < minx) {
minx = pos.getTextMatrix().getTranslateX();
}
if (maxx == -1 || pos.getTextMatrix().getTranslateX() > maxx) {
maxx = pos.getTextMatrix().getTranslateX();
}
}
TextPosition firstPosition = line.textPositions.get(0);
TextPosition lastPosition = line.textPositions.get(line.textPositions.size() - 1);
// corrected x and y
PDRectangle cropBox = doc.getPage(0).getCropBox();
float x = minx + cropBox.getLowerLeftX();
float y = firstPosition.getTextMatrix().getTranslateY() + cropBox.getLowerLeftY();
float w = (maxx - minx) + lastPosition.getWidth();
float h = lastPosition.getHeightDir();
PDPageContentStream contentStream = new PDPageContentStream(doc, doc.getPage(0), PDPageContentStream.AppendMode.APPEND, false, true);
contentStream.setNonStrokingColor(Color.RED);
contentStream.addRect(x, y, w, h);
contentStream.fill();
contentStream.close();
File fileout = new File(RESULT_FOLDER, "mwb_I_201711-withRectangles.pdf");
doc.save(fileout);
doc.close();
}
}
示例5: writeString_WithSampleTextPositions_WritesRoundedCsv
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Test
public void writeString_WithSampleTextPositions_WritesRoundedCsv() throws Exception {
StringWriter writer = mock(StringWriter.class);
ChartStripper chartStripper = new ChartStripper(writer);
List<TextPosition> textPositions = new ArrayList<>();
textPositions.add(sampleTextPosition());
chartStripper.writeString("A", textPositions);
Mockito.verify(writer).write(captor.capture());
assertThat(captor.getValue(),
equalTo(System.lineSeparator() +
"7.000|-6.000|14.000|5.000|11.000|13.000|2.000|A"));
}
示例6: writeString
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException
{
for (TextPosition textPosition: textPositions)
{
if (textPosition == null) {
continue;
}
String c = textPosition.getUnicode();
// if c not printable, return
if (!isPrintable(c)) {
continue;
}
Float h = textPosition.getHeightDir();
if (c.equals(NBSP)) { // replace non-breaking space for space
c = " ";
}
float wos = textPosition.getWidthOfSpace();
TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2),
Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2),
Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSize(), c,
// workaround a possible bug in PDFBox:
// https://issues.apache.org/jira/browse/PDFBOX-1755
wos, textPosition.getDir());
this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth());
this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight());
this.spatialIndex.add(te);
this.textElements.add(te);
}
}
示例7: writeString
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String string, List<TextPosition> pos) throws IOException {
PDPage page = this.getCurrentPage();
if (!pagesList.contains(page)) {
currentPage = new Page(page.getCropBox(), ++pageCount);
pagesList.add(page);
}
if (!previousPages.contains(currentPage))
previousPages.add(currentPage);
PDGraphicsState gs = this.getGraphicsState();
for (TextPosition tp : pos) {
this.processTextPositionNew(tp, gs, string);
}
coalesceRows(currentPage, string);
removeDuplicates(currentPage);
}
示例8: checkForValue
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
void checkForValue(List<TextPosition> textPositions)
{
for (TextPosition textPosition : textPositions)
{
if (inField(textPosition))
{
float textX = textPosition.getTextMatrix().getTranslateX();
if (textX > lastX + textPosition.getWidthOfSpace() / 2 && value.length() > 0)
value += " ";
value += textPosition.getUnicode();
lastX = textX + textPosition.getWidth();
}
}
}
示例9: charAt
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
public char charAt(int index)
{
TextPosition textPosition = textPositionAt(index);
String text = textPosition.getUnicode();
return text.charAt(0);
}
示例10: toString
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
String toString(List<List<TextPosition>> words)
{
StringBuilder stringBuilder = new StringBuilder();
boolean first = true;
for (List<TextPosition> word : words)
{
if (first)
first = false;
else
stringBuilder.append(' ');
for (TextPosition textPosition : word)
{
stringBuilder.append(textPosition.getUnicode());
}
}
// cf. http://stackoverflow.com/a/7171932/1729265
return Normalizer.normalize(stringBuilder, Form.NFKC);
}
示例11: processTextPosition
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void processTextPosition(TextPosition text) {
Matrix textMatrix = text.getTextMatrix();
Vector start = textMatrix.transform(new Vector(0, 0));
Vector end = new Vector(start.getX() + text.getWidth(), start.getY());
PDGraphicsState gs = getGraphicsState();
Area area = gs.getCurrentClippingPath();
if (area == null ||
(area.contains(lowerLeftX + start.getX(), lowerLeftY + start.getY()) &&
((!checkEndPointToo) || area.contains(lowerLeftX + end.getX(), lowerLeftY + end.getY()))))
super.processTextPosition(text);
}
示例12: testCoverTextByRectanglesInput
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
/**
* <a href="https://stackoverflow.com/questions/46080131/text-coordinates-when-stripping-from-pdfbox">
* Text coordinates when stripping from PDFBox
* </a>
* <p>
* This test applies the OP's code to an arbitrary PDF file and it did work properly
* (well, it did only cover the text from the baseline upwards but that is to be expected).
* </p>
*/
@Test
public void testCoverTextByRectanglesInput() throws IOException {
try ( InputStream resource = getClass().getResourceAsStream("input.pdf") ) {
PDDocument doc = PDDocument.load(resource);
myStripper stripper = new myStripper();
stripper.setStartPage(1); // fix it to first page just to test it
stripper.setEndPage(1);
stripper.getText(doc);
TextLine line = stripper.lines.get(1); // the line i want to paint on
float minx = -1;
float maxx = -1;
for (TextPosition pos: line.textPositions)
{
if (pos == null)
continue;
if (minx == -1 || pos.getTextMatrix().getTranslateX() < minx) {
minx = pos.getTextMatrix().getTranslateX();
}
if (maxx == -1 || pos.getTextMatrix().getTranslateX() > maxx) {
maxx = pos.getTextMatrix().getTranslateX();
}
}
TextPosition firstPosition = line.textPositions.get(0);
TextPosition lastPosition = line.textPositions.get(line.textPositions.size() - 1);
float x = minx;
float y = firstPosition.getTextMatrix().getTranslateY();
float w = (maxx - minx) + lastPosition.getWidth();
float h = lastPosition.getHeightDir();
PDPageContentStream contentStream = new PDPageContentStream(doc, doc.getPage(0), PDPageContentStream.AppendMode.APPEND, false);
contentStream.setNonStrokingColor(Color.RED);
contentStream.addRect(x, y, w, h);
contentStream.fill();
contentStream.close();
File fileout = new File(RESULT_FOLDER, "input-withRectangles.pdf");
doc.save(fileout);
doc.close();
}
}
示例13: writeString
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
TextLine tmpline = null;
if (startOfLine) {
tmpline = new TextLine();
tmpline.text = text;
tmpline.textPositions = textPositions;
lines.add(tmpline);
} else {
tmpline = lines.get(lines.size() - 1);
tmpline.text += text;
tmpline.textPositions.addAll(textPositions);
}
if (startOfLine) {
startOfLine = false;
}
super.writeString(text, textPositions);
}
示例14: printSubwords
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
void printSubwords(PDDocument document, String searchTerm) throws IOException
{
System.out.printf("* Looking for '%s'\n", searchTerm);
for (int page = 1; page <= document.getNumberOfPages(); page++)
{
List<TextPositionSequence> hits = findSubwords(document, page, searchTerm);
for (TextPositionSequence hit : hits)
{
if (!searchTerm.equals(hit.toString()))
System.out.printf(" Invalid (%s) ", hit.toString());
TextPosition lastPosition = hit.textPositionAt(hit.length() - 1);
System.out.printf(" Page %s at %s, %s with width %s and last letter '%s' at %s, %s\n",
page, hit.getX(), hit.getY(), hit.getWidth(),
lastPosition.getUnicode(), lastPosition.getXDirAdj(), lastPosition.getYDirAdj());
}
}
}
示例15: TextMetrics
import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
public TextMetrics(TextPosition tp)
{
x = tp.getX();
baseline = tp.getY();
font = tp.getFont();
width = tp.getWidth();
height = tp.getHeight();
pointSize = tp.getFontSizeInPt();
fontSize = tp.getYScale();
ascent = getAscent();
descent = getDescent();
}