...
Codeblock | ||||||||
---|---|---|---|---|---|---|---|---|
| ||||||||
public class OCRSelectedAreaCommand extends AbstractSelectionCommand {
Logger log = LoggerFactory.getLogger(OCRSelectedAreaCommand.class);
private boolean canExecute = true;
private int exportResolution = 72;
@Override
protected boolean canExecute() {
return canExecute;
};
private PageView pageView;
@Override
protected void execute() {
canExecute = false;
final Selection selection = getFirstSelection();
final Page page = selection.getPage();
final Rectangle2D bounds = selection.getBounds();
// Resolution for image
// Default value
Resolution imgResolution = new Resolution(200.0f);
SimpleTesseractWrapper wrapper = new SimpleTesseractWrapper();
// TODO getSelection as BufferdImage
BufferedImage image = getRendering(page, bounds);
wrapper.doOcr(image, bounds, imgResolution);
String text = wrapper.loadResult();
Toolkit.getDefaultToolkit().getSystemClipboard().setContents(new StringSelection(text), null);
canExecute = true;
}
protected BufferedImage getRendering(Page page, Rectangle2D area) {
PageImageRenderer renderer = new PageImageRenderer(page);
if (null != pageView)
renderer.setRenderControls(pageView.getPageControls(page).createSnapshot());
RenderControls rc = renderer.getRenderControls();
rc.getBaseRenderSettings().setDeviceResolution(getExportResolution());
Rectangle roi = createTransformedBounds(rc.getBaseRenderSettings().getAffineTransform(page.getSize()), area);
roi = roi.intersection(new Rectangle(Pages.getRenderedSize(page, rc)));
renderer.setRenderArea(roi);
renderer.setEnableCaching(true);
return renderer.renderImage();
}
public int getExportResolution() {
return exportResolution;
}
public void setExportResolution(int exportResolution) {
this.exportResolution = exportResolution;
}
}
|
...
- Anstreuerung des CLI- der OCR-Engine (Tesseract-ocr).
- Einlesen des Ergebnisses der OCR-Verarbeitung als String.
...
Codeblock | ||||||
---|---|---|---|---|---|---|
| ||||||
public class SimpleTesseractWrapper {
Logger LOGGER = LoggerFactory.getLogger(SimpleTesseractWrapper.class);
final String installPath = "C:/Program Files (x86)/Tesseract-OCR/";
private File outputFile = new File("c:/tmp/ocr-out/");
private File tempFile = new File("c:/tmp/ocr-in/");
private final String language = "eng";
public SimpleTesseractWrapper() {
try {
tempFile = File.createTempFile("temp_ocr", ".png");
outputFile = File.createTempFile("temp_ocr", ".txt");
} catch (IOException e) {
LOGGER.error("Could not create temp file", e);
}
}
public void doOcr(BufferedImage image, Rectangle2D roi, Resolution resolution) {
InputStream stderr = null;
String outputFileName = outputFile.getParent() + File.separator + outputFile.getName().replace(".txt", "");
BufferedReader br = null;
try {
ImageIO.write(image, "PNG", tempFile);
} catch (IOException e1) {
LOGGER.error("Error writing File", e1);
}
try {
ProcessBuilder builder = new ProcessBuilder(installPath +File.separator+ "tesseract.exe", tempFile.getAbsolutePath(),
outputFileName, "-l " + language);
Process proc = builder.start();
stderr = proc.getErrorStream();
InputStreamReader isr = new InputStreamReader(stderr);
br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null)
System.out.println(line);
int exitVal = proc.waitFor();
if (exitVal != 0) {
LOGGER.debug("Process exitValue: " + exitVal);
}
}
catch (Exception e) {
// TODO Error Handling
} finally {
try {
stderr.close();
tempFile.delete();
br.close();
} catch (IOException e) {
LOGGER.error("Couldn't close Streams", e);
}
}
}
public String loadResult() {
String content = null;
File file = outputFile;
try {
FileReader reader = new FileReader(file);
char[] chars = new char[(int) file.length()];
reader.read(chars);
content = new String(chars);
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return content;
}
}
{code} |
...
Info |
---|
Related articles
Nach Stichwort filtern (Inhalt nach Stichwort) | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|