Versionen im Vergleich

Schlüssel

  • Diese Zeile wurde hinzugefügt.
  • Diese Zeile wurde entfernt.
  • Formatierung wurde geändert.

...

Codeblock
themeEclipse
languagejava
titleOCRSelectedAreaCommand
linenumberstrue
public class OCRSelectedAreaCommand extends AbstractSelectionCommand {
  Logger log = LoggerFactory.getLogger(OCRSelectedAreaCommand.class);
  private boolean canExecute = true;
  private int exportResolution = 72;

  @Override
  protected boolean canExecute() {
    return canExecute;
  };

  private PageView pageView;

  @Override
  protected void execute() {
    canExecute = false;
    final Selection selection = getFirstSelection();
    final Page page = selection.getPage();
    final Rectangle2D bounds = selection.getBounds();

    // Resolution for image
    // Default value
    Resolution imgResolution = new Resolution(200.0f);

    SimpleTesseractWrapper wrapper = new SimpleTesseractWrapper();
    // TODO getSelection as BufferdImage

    BufferedImage image = getRendering(page, bounds);
    wrapper.doOcr(image, bounds, imgResolution);

    String text = wrapper.loadResult();
    Toolkit.getDefaultToolkit().getSystemClipboard().setContents(new StringSelection(text), null);

    canExecute = true;

  }

  protected BufferedImage getRendering(Page page, Rectangle2D area) {
    PageImageRenderer renderer = new PageImageRenderer(page);
    if (null != pageView)
      renderer.setRenderControls(pageView.getPageControls(page).createSnapshot());

    RenderControls rc = renderer.getRenderControls();
    rc.getBaseRenderSettings().setDeviceResolution(getExportResolution());

    Rectangle roi = createTransformedBounds(rc.getBaseRenderSettings().getAffineTransform(page.getSize()), area);
    roi = roi.intersection(new Rectangle(Pages.getRenderedSize(page, rc)));
    renderer.setRenderArea(roi);

    renderer.setEnableCaching(true);

    return renderer.renderImage();
  }

  public int getExportResolution() {
    return exportResolution;
  }

  public void setExportResolution(int exportResolution) {
    this.exportResolution = exportResolution;
  }

}

...

  1. Anstreuerung des CLI- der OCR-Engine (Tesseract-ocr).
  2. Einlesen des Ergebnisses der OCR-Verarbeitung als String.

...

 

Codeblock
themeEclipse
languagejava
titleSimpleTesseractWrapper
public class SimpleTesseractWrapper {


  Logger LOGGER = LoggerFactory.getLogger(SimpleTesseractWrapper.class);

  final String installPath = "C:/Program Files (x86)/Tesseract-OCR/";

  private File outputFile = new File("c:/tmp/ocr-out/");

  private File tempFile = new File("c:/tmp/ocr-in/");


  private final String language = "eng";

  public SimpleTesseractWrapper() {

    try {
      tempFile = File.createTempFile("temp_ocr", ".png");
      outputFile = File.createTempFile("temp_ocr", ".txt");
    } catch (IOException e) {
      LOGGER.error("Could not create temp file", e);
    }
  }

  public void doOcr(BufferedImage image, Rectangle2D roi, Resolution resolution) {
    InputStream stderr = null;
    String outputFileName = outputFile.getParent() + File.separator + outputFile.getName().replace(".txt", "");
    BufferedReader br = null;
    
    try {
      ImageIO.write(image, "PNG", tempFile);
    } catch (IOException e1) {
      LOGGER.error("Error writing File", e1);
    }
    try {
      ProcessBuilder builder = new ProcessBuilder(installPath +File.separator+ "tesseract.exe", tempFile.getAbsolutePath(),
          outputFileName, "-l " + language);

      Process proc = builder.start();
      stderr = proc.getErrorStream();

      InputStreamReader isr = new InputStreamReader(stderr);
      br = new BufferedReader(isr);
      String line = null;
      while ((line = br.readLine()) != null)
        System.out.println(line);
      int exitVal = proc.waitFor();
      if (exitVal != 0) {
        LOGGER.debug("Process exitValue: " + exitVal);
      }

    }


    catch (Exception e) {
      // TODO Error Handling
    } finally {
      try {
        stderr.close();
        tempFile.delete();
        br.close();

      } catch (IOException e) {
        LOGGER.error("Couldn't close Streams", e);
      }

    }
  }

  public String loadResult() {

    String content = null;
    File file = outputFile;
    try {
      FileReader reader = new FileReader(file);
      char[] chars = new char[(int) file.length()];
      reader.read(chars);
      content = new String(chars);
      reader.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
    return content;

  }
}
{code}

 

...

 

Info

Nach Stichwort filtern (Inhalt nach Stichwort)
showLabelsfalse
max5
spacesJKB
sortmodified
showSpacefalse
reversetrue
typepage
labelsjadice documentplatform