Read PDF file using Selenium Webdriver

Download pdfbox and fontbox jars from https://pdfbox.apache.org/download.cgi
To read pdf file and verify the content is present in the pdf using.


import java.io.BufferedInputStream;
import java.io.IOException;
import java.net.URL;

import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.util.PDFTextStripper;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.testng.annotations.Test;

public class PdfRead {
WebDriver driver = new FirefoxDriver();

public void verifyPdfContent(String[] values) throws IOException {

URL url = new URL(driver.getCurrentUrl());
BufferedInputStream inputPdfFile = new BufferedInputStream(
url.openStream());
PDFParser pdfParser = new PDFParser(inputPdfFile);
pdfParser.parse();
String pdfContent = new PDFTextStripper().getText(pdfParser
.getPDDocument());
// pdfParser.getPDDocument().close();
System.out.println("printing PDF content fully :" + pdfContent);
for (int i = 0; i < values.length; i++) {
if (pdfContent.contains(values[i])) {
System.out.println((values[i] + " is present in the PDF file"));

} else {
System.out
.println((values[i] + " is not present in the PDF file"));

}
}
}

@Test
public void myTest() throws IOException {
driver.get("http://partners.adobe.com/public/developer/en/acrobat/PDFOpenParameters.pdf");
// Values to verify
String[] values = new String[2];
values[0] = "xxxcsds";
values[1] = "hey";
// verifying the pdf with given values
verifyPdfContent(values);

}

}

No images available.