当前位置: 首页>>代码示例>>Java>>正文


Java TextPosition类代码示例

本文整理汇总了Java中org.apache.pdfbox.text.TextPosition的典型用法代码示例。如果您正苦于以下问题:Java TextPosition类的具体用法?Java TextPosition怎么用?Java TextPosition使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TextPosition类属于org.apache.pdfbox.text包,在下文中一共展示了TextPosition类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: writeString

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
    for (TextPosition text : textPositions) {
        String row =
                round(text.getXDirAdj()) + "|" +
                        round(text.getYDirAdj()) + "|" +
                        round(text.getFontSize()) + "|" +
                        round(text.getXScale()) + "|" +
                        round(text.getHeightDir()) + "|" +
                        round(text.getWidthOfSpace()) + "|" +
                        round(text.getWidthDirAdj()) + "|" +
                        text.getUnicode();

        writer.write(System.lineSeparator() + row.trim());
    }
}
 
开发者ID:robinhowlett,项目名称:chart-parser,代码行数:17,代码来源:ChartStripper.java

示例2: newFor

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
public static Text newFor(TextPosition tp, PDGraphicsState gs,String text) {
		Text t = new Text();
		t.x = tp.getXDirAdj();
		t.baseline = tp.getYDirAdj();
		t.font = tp.getFont();
		t.strokeColor = gs.getStrokingColor();
		t.nonStrokeColor = gs.getNonStrokingColor();
		t.run = tp.getUnicode();
		t.width = tp.getWidth();
		t.height = tp.getHeight();
		t.pointSize = tp.getFontSizeInPt();
		t.fontSize = tp.getYScale();
		t.tempRun = t.run;

		// Bump the width by the word spacing for each space in tp.
/*		for (int i=0; i<tp.getCharacter().length(); i++) {
		    Character c = tp.getCharacter().charAt(i);
		    if (c.equals(" ")) {
		      t.width -= tp.getWidthOfSpace();
		        t.width += tp.getWordSpacing();
		    }
		}
*/		
		return t;
	}
 
开发者ID:TekstoSense,项目名称:pdf-segmenter,代码行数:26,代码来源:Text.java

示例3: deleteCharsInPath

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
void deleteCharsInPath() {
    for (List<TextPosition> list : charactersByArticle) {
        List<TextPosition> toRemove = new ArrayList<>();
        for (TextPosition text : list) {
            Matrix textMatrix = text.getTextMatrix();
            Vector start = textMatrix.transform(new Vector(0, 0));
            Vector end = new Vector(start.getX() + text.getWidth(), start.getY());
            if (linePath.contains(lowerLeftX + start.getX(), lowerLeftY + start.getY()) ||
                    (checkEndPointToo && linePath.contains(lowerLeftX + end.getX(), lowerLeftY + end.getY()))) {
                toRemove.add(text);
            }
        }
        if (toRemove.size() != 0) {
            System.out.println(toRemove.size());
            list.removeAll(toRemove);
        }
    }
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:19,代码来源:PDFVisibleTextStripper.java

示例4: testCoverTextByRectanglesMwbI201711

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
/**
 * <a href="https://stackoverflow.com/questions/46080131/text-coordinates-when-stripping-from-pdfbox">
 * Text coordinates when stripping from PDFBox
 * </a>
 * <br/>
 * <a href="https://download-a.akamaihd.net/files/media_mwb/b7/mwb_I_201711.pdf">
 * mwb_I_201711.pdf
 * </a>
 * <p>
 * This test applies the OP's code to his example PDF file and indeed, there is an offset!
 * This is due to the <code>LegacyPDFStreamEngine</code> method <code>showGlyph</code>
 * which manipulates the text rendering matrix to make the lower left corner of the
 * crop box the origin. In the current version of this test, that offset is corrected,
 * see below. 
 * </p>
 */
@Test
public void testCoverTextByRectanglesMwbI201711() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("mwb_I_201711.pdf")  ) {
        PDDocument doc = PDDocument.load(resource);

        myStripper stripper = new myStripper();

        stripper.setStartPage(1); // fix it to first page just to test it
        stripper.setEndPage(1);
        stripper.getText(doc);

        TextLine line = stripper.lines.get(1); // the line i want to paint on

        float minx = -1;
        float maxx = -1;

        for (TextPosition pos: line.textPositions)
        {
            if (pos == null)
                continue;

            if (minx == -1 || pos.getTextMatrix().getTranslateX() < minx) {
                minx = pos.getTextMatrix().getTranslateX();
            }
            if (maxx == -1 || pos.getTextMatrix().getTranslateX() > maxx) {
                maxx = pos.getTextMatrix().getTranslateX();
            }
        }

        TextPosition firstPosition = line.textPositions.get(0);
        TextPosition lastPosition = line.textPositions.get(line.textPositions.size() - 1);

        // corrected x and y
        PDRectangle cropBox = doc.getPage(0).getCropBox();

        float x = minx + cropBox.getLowerLeftX();
        float y = firstPosition.getTextMatrix().getTranslateY() + cropBox.getLowerLeftY();
        float w = (maxx - minx) + lastPosition.getWidth();
        float h = lastPosition.getHeightDir();

        PDPageContentStream contentStream = new PDPageContentStream(doc, doc.getPage(0), PDPageContentStream.AppendMode.APPEND, false, true);

        contentStream.setNonStrokingColor(Color.RED);
        contentStream.addRect(x, y, w, h);
        contentStream.fill();
        contentStream.close();

        File fileout = new File(RESULT_FOLDER, "mwb_I_201711-withRectangles.pdf");
        doc.save(fileout);
        doc.close();
    }
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:69,代码来源:RectanglesOverText.java

示例5: writeString_WithSampleTextPositions_WritesRoundedCsv

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Test
public void writeString_WithSampleTextPositions_WritesRoundedCsv() throws Exception {
    StringWriter writer = mock(StringWriter.class);
    ChartStripper chartStripper = new ChartStripper(writer);

    List<TextPosition> textPositions = new ArrayList<>();
    textPositions.add(sampleTextPosition());

    chartStripper.writeString("A", textPositions);

    Mockito.verify(writer).write(captor.capture());

    assertThat(captor.getValue(),
            equalTo(System.lineSeparator() +
                    "7.000|-6.000|14.000|5.000|11.000|13.000|2.000|A"));
}
 
开发者ID:robinhowlett,项目名称:chart-parser,代码行数:17,代码来源:ChartStripperTest.java

示例6: writeString

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException
{
    for (TextPosition textPosition: textPositions)
    {
        if (textPosition == null) {
            continue;
        }

        String c = textPosition.getUnicode();

        // if c not printable, return
        if (!isPrintable(c)) {
            continue;
        }

        Float h = textPosition.getHeightDir();

        if (c.equals(NBSP)) { // replace non-breaking space for space
            c = " ";
        }

        float wos = textPosition.getWidthOfSpace();

        TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2),
                Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2),
                Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSize(), c,
                // workaround a possible bug in PDFBox:
                // https://issues.apache.org/jira/browse/PDFBOX-1755
                wos, textPosition.getDir());

        this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth());
        this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight());

        this.spatialIndex.add(te);
        this.textElements.add(te);
    }
}
 
开发者ID:redmyers,项目名称:484_P7_1-Java,代码行数:39,代码来源:TextStripper.java

示例7: writeString

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String string, List<TextPosition> pos) throws IOException {

    PDPage page = this.getCurrentPage();

    if (!pagesList.contains(page)) {
        currentPage = new Page(page.getCropBox(), ++pageCount);
        pagesList.add(page);
    }

    if (!previousPages.contains(currentPage))
        previousPages.add(currentPage);

    PDGraphicsState gs = this.getGraphicsState();
    for (TextPosition tp : pos) {
        this.processTextPositionNew(tp, gs, string);
    }

    coalesceRows(currentPage, string);
    removeDuplicates(currentPage);

}
 
开发者ID:TekstoSense,项目名称:pdf-segmenter,代码行数:23,代码来源:TextExtractor.java

示例8: checkForValue

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
void checkForValue(List<TextPosition> textPositions)
{
    for (TextPosition textPosition : textPositions)
    {
        if (inField(textPosition))
        {
            float textX = textPosition.getTextMatrix().getTranslateX();
            if (textX > lastX + textPosition.getWidthOfSpace() / 2 && value.length() > 0)
                value += " ";
            value += textPosition.getUnicode();
            lastX = textX + textPosition.getWidth();
        }
    }
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:15,代码来源:HelloSignAnalyzer.java

示例9: charAt

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
public char charAt(int index)
{
    TextPosition textPosition = textPositionAt(index);
    String text = textPosition.getUnicode();
    return text.charAt(0);
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:8,代码来源:TextPositionSequence.java

示例10: toString

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
String toString(List<List<TextPosition>> words)
{
    StringBuilder stringBuilder = new StringBuilder();
    boolean first = true;
    for (List<TextPosition> word : words)
    {
        if (first)
            first = false;
        else
            stringBuilder.append(' ');
        for (TextPosition textPosition : word)
        {
            stringBuilder.append(textPosition.getUnicode());
        }
    }
    // cf. http://stackoverflow.com/a/7171932/1729265
    return Normalizer.normalize(stringBuilder, Form.NFKC);
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:19,代码来源:TextSection.java

示例11: processTextPosition

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void processTextPosition(TextPosition text) {
    Matrix textMatrix = text.getTextMatrix();
    Vector start = textMatrix.transform(new Vector(0, 0));
    Vector end = new Vector(start.getX() + text.getWidth(), start.getY());

    PDGraphicsState gs = getGraphicsState();
    Area area = gs.getCurrentClippingPath();
    if (area == null ||
            (area.contains(lowerLeftX + start.getX(), lowerLeftY + start.getY()) &&
                    ((!checkEndPointToo) || area.contains(lowerLeftX + end.getX(), lowerLeftY + end.getY()))))
        super.processTextPosition(text);
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:14,代码来源:PDFVisibleTextStripper.java

示例12: testCoverTextByRectanglesInput

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
/**
 * <a href="https://stackoverflow.com/questions/46080131/text-coordinates-when-stripping-from-pdfbox">
 * Text coordinates when stripping from PDFBox
 * </a>
 * <p>
 * This test applies the OP's code to an arbitrary PDF file and it did work properly
 * (well, it did only cover the text from the baseline upwards but that is to be expected).
 * </p>
 */
@Test
public void testCoverTextByRectanglesInput() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("input.pdf")  ) {
        PDDocument doc = PDDocument.load(resource);

        myStripper stripper = new myStripper();

        stripper.setStartPage(1); // fix it to first page just to test it
        stripper.setEndPage(1);
        stripper.getText(doc);

        TextLine line = stripper.lines.get(1); // the line i want to paint on

        float minx = -1;
        float maxx = -1;

        for (TextPosition pos: line.textPositions)
        {
            if (pos == null)
                continue;

            if (minx == -1 || pos.getTextMatrix().getTranslateX() < minx) {
                minx = pos.getTextMatrix().getTranslateX();
            }
            if (maxx == -1 || pos.getTextMatrix().getTranslateX() > maxx) {
                maxx = pos.getTextMatrix().getTranslateX();
            }
        }

        TextPosition firstPosition = line.textPositions.get(0);
        TextPosition lastPosition = line.textPositions.get(line.textPositions.size() - 1);

        float x = minx;
        float y = firstPosition.getTextMatrix().getTranslateY();
        float w = (maxx - minx) + lastPosition.getWidth();
        float h = lastPosition.getHeightDir();

        PDPageContentStream contentStream = new PDPageContentStream(doc, doc.getPage(0), PDPageContentStream.AppendMode.APPEND, false);

        contentStream.setNonStrokingColor(Color.RED);
        contentStream.addRect(x, y, w, h);
        contentStream.fill();
        contentStream.close();

        File fileout = new File(RESULT_FOLDER, "input-withRectangles.pdf");
        doc.save(fileout);
        doc.close();
    }
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:59,代码来源:RectanglesOverText.java

示例13: writeString

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
@Override
protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
    TextLine tmpline = null;

    if (startOfLine) {
        tmpline = new TextLine();
        tmpline.text = text;
        tmpline.textPositions = textPositions;
        lines.add(tmpline);
    } else {
        tmpline = lines.get(lines.size() - 1);
        tmpline.text += text;
        tmpline.textPositions.addAll(textPositions);
    }

    if (startOfLine) {
        startOfLine = false;
    }
    super.writeString(text, textPositions);
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:21,代码来源:RectanglesOverText.java

示例14: printSubwords

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
void printSubwords(PDDocument document, String searchTerm) throws IOException
{
    System.out.printf("* Looking for '%s'\n", searchTerm);
    for (int page = 1; page <= document.getNumberOfPages(); page++)
    {
        List<TextPositionSequence> hits = findSubwords(document, page, searchTerm);
        for (TextPositionSequence hit : hits)
        {
            if (!searchTerm.equals(hit.toString()))
                System.out.printf("  Invalid (%s) ", hit.toString());
            TextPosition lastPosition = hit.textPositionAt(hit.length() - 1);
            System.out.printf("  Page %s at %s, %s with width %s and last letter '%s' at %s, %s\n",
                    page, hit.getX(), hit.getY(), hit.getWidth(),
                    lastPosition.getUnicode(), lastPosition.getXDirAdj(), lastPosition.getYDirAdj());
        }
    }
}
 
开发者ID:mkl-public,项目名称:testarea-pdfbox2,代码行数:18,代码来源:SearchSubword.java

示例15: TextMetrics

import org.apache.pdfbox.text.TextPosition; //导入依赖的package包/类
public TextMetrics(TextPosition tp)
{
    x = tp.getX();
    baseline = tp.getY();
    font = tp.getFont();
    width = tp.getWidth();
    height = tp.getHeight();
    pointSize = tp.getFontSizeInPt();
    fontSize = tp.getYScale();
    ascent = getAscent();
    descent = getDescent();
}
 
开发者ID:radkovo,项目名称:Pdf2Dom,代码行数:13,代码来源:TextMetrics.java


注:本文中的org.apache.pdfbox.text.TextPosition类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。