Java PDF to Image Conversion using PDFBox

Here are few snippets of code that you can use to convert a PDF file to images eg: tiff, png, multi-page tiff. It uses a open source library PDFBox version 3

The Apache PDFBox® library is an open source Java tool for working with PDF documents. This project allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents.

PDFBox comes with following features:

  • Extract Unicode text from PDF files.
  • Split a single PDF into many files or merge multiple PDF files.
  • Extract data from PDF forms or fill a PDF form.
  • Validate PDF files against the PDF/A-1b standard.
  • Print a PDF file using the standard Java printing API.
  • Save PDFs as image files, such as PNG or JPEG.
  • Create a PDF from scratch, with embedded fonts and images.

Convert PDF to PNG/JPG/BMP etc:

public static void convertToSeparateImageFiles(File pdf, String type) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.RGB;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

File outputFile = new File(pdf.getAbsoluteFile().getParent() +
File.separator + pdf.getName() + "-P-" + page + "." + type);
ImageIO.write(bim, type, outputFile);
}
}
}


Convert PDF to PNG/JPG/BMP etc with compression

public static void convertToSeparateImageFilesWithCompression(File pdf, String type) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.RGB;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {
ImageWriter writer = null;
try {
writer = ImageIO.getImageWritersByFormatName(type).next();

ImageWriteParam params = writer.getDefaultWriteParam();
params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
params.setCompressionQuality(0.6f);

PDFRenderer pdfRenderer = new PDFRenderer(document);

for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

File outputFile = new File(pdf.getAbsoluteFile().getParent() +
File.separator + pdf.getName() + "-compressed-P-" + page + "." + type);
ImageOutputStream outputStream = new FileImageOutputStream(outputFile);
writer.setOutput(outputStream);

writer.write(null, new IIOImage(bim, null, null), params);
}
} finally {
if (writer != null) {
writer.dispose();
}
}
}
}

 

 Convert PDF to single page tiff files

public static void convertToSinglePageTiffs(File pdf) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.BINARY;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {

ImageWriter writer = null;
try {
writer = ImageIO.getImageWritersByFormatName("tiff").next();

ImageWriteParam params = writer.getDefaultWriteParam();
params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
// Compression Types: None, PackBits, ZLib, Deflate, LZW, JPEG and CCITT
params.setCompressionType("LZW");
params.setCompressionQuality(0.8f);

PDFRenderer pdfRenderer = new PDFRenderer(document);

for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

File outputFile = new File(pdf.getAbsoluteFile().getParent() +
File.separator + pdf.getName() + "-compressed-P-" + page + ".tif");
ImageOutputStream outputStream = new FileImageOutputStream(outputFile);
writer.setOutput(outputStream);
writer.write(null, new IIOImage(bim, null, null), params);
}
} finally {
if (writer != null) {
writer.dispose();
}
}
}
}

 

Convert PDF to a multi-page tiff file

public static void convertToMultipageTiff(File pdf, File outputTiff) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.GRAY;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {
try (ImageOutputStream ios = ImageIO.createImageOutputStream(outputTiff)) {

ImageWriter writer = null;
try {
writer = ImageIO.getImageWritersByFormatName("tiff").next();
writer.setOutput(ios);
writer.prepareWriteSequence(null);

ImageWriteParam params = writer.getDefaultWriteParam();
params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
// Compression Types: None, PackBits, ZLib, Deflate, LZW, JPEG and CCITT
params.setCompressionType("LZW");
params.setCompressionQuality(0.8f);

PDFRenderer pdfRenderer = new PDFRenderer(document);

for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

IIOMetadata metadata = writer.getDefaultImageMetadata(new ImageTypeSpecifier(bim), params);
writer.writeToSequence(new IIOImage(bim, null, metadata), params);
}
} finally {
if (writer != null) {
writer.dispose();
}
}
}
}
}

 

Full code:

Uses dependency:

<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.0-RC1</version>
</dependency>


package pdf;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.*;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.stream.FileImageOutputStream;
import javax.imageio.stream.ImageOutputStream;
import java.awt.image.BufferedImage;
import java.io.File;

public class PDFUtils {
public static void main(String[] args) throws Exception {
File pdf = new File("test.pdf");
convertToSeparateImageFiles(pdf, "png");
convertToSeparateImageFilesWithCompression(pdf, "jpg");

//tiff
convertToSinglePageTiffs(pdf);
convertToMultipageTiff(pdf, new File(pdf.getAbsoluteFile().getParent() +
File.separator + pdf.getName() + "multi-page.tif"));

}

public static void convertToSeparateImageFiles(File pdf, String type) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.RGB;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

File outputFile = new File(pdf.getAbsoluteFile().getParent() +
File.separator + pdf.getName() + "-P-" + page + "." + type);
ImageIO.write(bim, type, outputFile);
}
}
}

public static void convertToSeparateImageFilesWithCompression(File pdf, String type) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.RGB;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {
ImageWriter writer = null;
try {
writer = ImageIO.getImageWritersByFormatName(type).next();

ImageWriteParam params = writer.getDefaultWriteParam();
params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
params.setCompressionQuality(0.6f);

PDFRenderer pdfRenderer = new PDFRenderer(document);

for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

File outputFile = new File(pdf.getAbsoluteFile().getParent() +
File.separator + pdf.getName() + "-compressed-P-" + page + "." + type);
ImageOutputStream outputStream = new FileImageOutputStream(outputFile);
writer.setOutput(outputStream);

writer.write(null, new IIOImage(bim, null, null), params);
}
} finally {
if (writer != null) {
writer.dispose();
}
}
}
}

public static void convertToSinglePageTiffs(File pdf) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.BINARY;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {

ImageWriter writer = null;
try {
writer = ImageIO.getImageWritersByFormatName("tiff").next();

ImageWriteParam params = writer.getDefaultWriteParam();
params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
// Compression Types: None, PackBits, ZLib, Deflate, LZW, JPEG and CCITT
params.setCompressionType("LZW");
params.setCompressionQuality(0.8f);

PDFRenderer pdfRenderer = new PDFRenderer(document);

for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

File outputFile = new File(pdf.getAbsoluteFile().getParent() +
File.separator + pdf.getName() + "-compressed-P-" + page + ".tif");
ImageOutputStream outputStream = new FileImageOutputStream(outputFile);
writer.setOutput(outputStream);
writer.write(null, new IIOImage(bim, null, null), params);
}
} finally {
if (writer != null) {
writer.dispose();
}
}
}
}

public static void convertToMultipageTiff(File pdf, File outputTiff) throws Exception {
int DPI = 300;
ImageType IMAGE_TYPE = ImageType.GRAY;//This can be GRAY,ARGB,BINARY, BGR

try (PDDocument document = Loader.loadPDF(pdf)) {
try (ImageOutputStream ios = ImageIO.createImageOutputStream(outputTiff)) {

ImageWriter writer = null;
try {
writer = ImageIO.getImageWritersByFormatName("tiff").next();
writer.setOutput(ios);
writer.prepareWriteSequence(null);

ImageWriteParam params = writer.getDefaultWriteParam();
params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
// Compression Types: None, PackBits, ZLib, Deflate, LZW, JPEG and CCITT
params.setCompressionType("LZW");
params.setCompressionQuality(0.8f);

PDFRenderer pdfRenderer = new PDFRenderer(document);

for (int page = 0; page < document.getNumberOfPages(); page++) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, DPI, IMAGE_TYPE);

IIOMetadata metadata = writer.getDefaultImageMetadata(new ImageTypeSpecifier(bim), params);
writer.writeToSequence(new IIOImage(bim, null, metadata), params);
}
} finally {
if (writer != null) {
writer.dispose();
}
}
}
}
}
}


 

 

No comments :

Post a Comment

Your Comment and Question will help to make this blog better...