I am trying to read some pdf files page by page and add the pages to an existing pdf using itextsharp. Here is my solution:
string path2 = Server.MapPath("~/2.pdf");
PdfReader reader = null;
iTextSharp.text.Document document = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage = null;
foreach (var pdfName in pdfNames)
{
path1 = Path.Combine(Server.MapPath("~/Files/Pdf/temp/"), pdfName);
reader = new PdfReader(path1);
for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++)
{
document = new iTextSharp.text.Document(reader.GetPageSizeWithRotation(pageIndex));
pdfCopyProvider = new PdfCopy(document, new System.IO.FileStream(path2, System.IO.FileMode.Append));
document.Open();
importedPage = pdfCopyProvider.GetImportedPage(reader, pageIndex);
iTextSharp.text.Image image = iTextSharp.text.Image.GetInstance(importedPage);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
document.Close();
}
But the output pdf just contains the pages of last pdf in the foreach loop.
Related
I have this code that will add a watermark on each page:
string watermarkLocation = AppDomain.CurrentDomain.BaseDirectory + "Watermark.png";
Document document = new Document();
PdfReader pdfReader = new PdfReader(fileLocation);
PdfStamper stamp = new PdfStamper(pdfReader, new FileStream(fileLocation.Replace(".pdf", "_marked.pdf"), FileMode.Create));
iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(watermarkLocation);
img.ScaleToFit(document.PageSize);
img.SetAbsolutePosition(0, 100);
PdfContentByte waterMark;
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
waterMark = stamp.GetOverContent(page);
waterMark.AddImage(img);
}
stamp.FormFlattening = true;
stamp.Close();
return fileLocation.Replace(".pdf", "_marked.pdf");
But on PDFs that have textboxes, the image will go behind the textbox/form. I thought flattening the file will fix this, but it does not work.
I used a full image as a test but the watermark in the end will have transparency.
Here's the final code I'm using. As my comment mentioned, there's basically 2 readers/stamps, one to flatten the file and another to add the watermark.
Flatten file:
private byte[] FlattenPdfFormToBytes(PdfReader reader)
{
var memStream = new MemoryStream();
var stamper = new PdfStamper(reader, memStream) { FormFlattening = true };
stamper.Close();
return memStream.ToArray();
}
Add Watermark (which will call FlattenPdfFormToBytes):
public string AddWatermark(string fileLocation)
{
string watermarkLocation = AppDomain.CurrentDomain.BaseDirectory + "Watermark.png";
Document document = new Document();
PdfReader pdfReader = new PdfReader(fileLocation);
PdfReader pdfFlatten = new PdfReader(FlattenPdfFormToBytes(pdfReader)); // The secret sauce is this!!!
PdfStamper stamp = new PdfStamper(pdfFlatten, new FileStream(fileLocation.Replace(".pdf", "_marked.pdf"), FileMode.Create));
iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(watermarkLocation);
img.ScaleToFit(document.PageSize);
img.SetAbsolutePosition(0, 100);
PdfContentByte waterMark;
for (int page = 1; page <= pdfFlatten.NumberOfPages; page++)
{
waterMark = stamp.GetOverContent(page);
waterMark.AddImage(img);
}
stamp.Close();
return fileLocation.Replace(".pdf", "_marked.pdf");
}
I want to cut out all pages of a PDF file that contain a special string (splittag). Until now I have this code but it just gives out all pages of the source PDF. So whats wrong with it? I iterate trough the Pages of the source PDF and check if the actual page contains the splittag, then create a new PDF using it for pagenumber. Would be great if someone could help. Thank you!
iTextSharp.text.PdfReader reader = new iTextSharp.text.PdfReader(textBox3.Text);
string splittag = textBox2.Text;
StringBuilder text = new StringBuilder();
for (int i = 1; i <= reader.NumberOfPages; i++)
{
if(PdfTextExtractor.GetTextFromPage(reader, i, new SimpleTextExtractionStrategy()).ToString().Contains(splittag)) ;
{
richTextBox1.Text = PdfTextExtractor.GetTextFromPage(reader, i, new SimpleTextExtractionStrategy());
Document document = new Document();
PdfCopy copy = new PdfCopy(document, new FileStream(textBox5.Text + "\\" + i + ".pdf", FileMode.Create));
document.Open();
copy.AddPage(copy.GetImportedPage(reader, i));
document.Close();
}
}
I would use following code:
public static List<Integer> determineSplits(String fileName) throws FileNotFoundException, IOException
{
PdfDocument pdfDocument = new PdfDocument(new PdfReader(fileName));
List<Integer> splitPages = new ArrayList<>();
for(int i=1;i<=pdfDocument.getNumberOfPages();i++) {
String pageTxt = PdfTextExtractor.getTextFromPage(pdfDocument.getPage(i));
if(pageTxt.contains("LoremIpsum"))
{
splitPages.add(1);
}
}
pdfDocument.close();
}
This generates a list of pages that need to be included.
Then you can use iText code to separate out the pages you want using
public List<PdfPage> PdfDocument::copyPagesTo(int pageFrom,
int pageTo,
PdfDocument toDocument,
IPdfPageExtraCopier copier)
I am using this code here now. Works fine and is more easy.
FileInfo file = new FileInfo(textBox2.Text);
using (PdfReader reader = new PdfReader(textBox2.Text))
{
for (int pagenumber = 1; pagenumber <= reader.NumberOfPages; pagenumber++)
{
string filename = System.IO.Path.GetFileNameWithoutExtension(file.Name);
Document document = new Document();
if(PdfTextExtractor.GetTextFromPage(reader, pagenumber, new SimpleTextExtractionStrategy()).Contains("LoremIpsum"))
{
PdfCopy copy = new PdfCopy(document, new FileStream(textBox3.Text + "\\" + filename + pagenumber + ".pdf", FileMode.Create));
document.Open();
copy.AddPage(copy.GetImportedPage(reader, pagenumber));
document.Close();
}
}
}
Getting merged pdf but corrupted file that I can't open using the below code. The 'targetPDF' is the final merged pdf file and 'fileNames' has all the single pdfs. Please help. Thanks in advance.
Using (FileStream stream = new FileStream(targetPDF, FileMode.Create, FileAccess.Write))
{
Document document = new Document();
PdfCopy pdf = new PdfCopy(document, stream);
if (pdf == null)
{
return;
}
document.Open();
foreach (string file in fileNames)
{
PdfReader reader = new PdfReader(file);
reader.ConsolidateNamedDestinations();
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfImportedPage page = pdf.GetImportedPage(reader, i);
pdf.AddPage(page);
//pdf.AddDocument(new PdfReader(file));
// pdf.AddPage(pdf.GetImportedPage(reader, 1));
}
reader.Close();
}
}
Change this lines
Document document = new Document();
PdfCopy pdf = new PdfCopy(document, stream);
To :
using(Document document = new Document())
{
using(PdfCopy pdf = new PdfCopy(document, stream))
{
//do staff here...
}
}
So that after the work is done all streams close and files were not locked.
I merged two PDF files into one PDF file using iText Sharp . But is it possible to merge excel file into PDF file using iText Sharp also? i tried many times but it doesn't work for me.Here is my PDF Merge code:
protected void btnMerge_Click(object sender, EventArgs e)
{
if (file1.HasFile && file2.HasFile)
{
PdfReader pdfReader1 = new PdfReader(file1.PostedFile.InputStream);
PdfReader pdfReader2 = new PdfReader(file2.PostedFile.InputStream);
List<PdfReader> readerList = new List<PdfReader>();
readerList.Add(pdfReader1);
readerList.Add(pdfReader2);
//Define a new output document and its size, type
Document document = new Document(PageSize.A4, 0, 0, 0, 0);
//Get instance response output stream to write output file.
PdfWriter writer = PdfWriter.GetInstance(document, Response.OutputStream);
document.Open();
foreach (PdfReader reader in readerList)
{
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfImportedPage page = writer.GetImportedPage(reader, i);
document.Add(iTextSharp.text.Image.GetInstance(page));
}
}
document.Close();
Response.AppendHeader("content-disposition", "inline; filename=OutPut.pdf");
Response.ContentType = "application/pdf";
}
}
private void MergePDFs(string outPutFilePath, params string[] filesPath)
{
List<PdfReader> readerList = new List<PdfReader>();
foreach (string filePath in filesPath)
{
PdfReader pdfReader = new PdfReader(filePath);
readerList.Add(pdfReader);
}
//Define a new output document and its size, type
Document document = new Document(PageSize.A4, 0, 0, 0, 0);
//Create blank output pdf file and get the stream to write on it.
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(outPutFilePath, FileMode.Create));
document.Open();
foreach (PdfReader reader in readerList)
{
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfImportedPage page = writer.GetImportedPage(reader, i);
document.Add(iTextSharp.text.Image.GetInstance(page));
}
}
document.Close();
}
}
}
We are using itextsharp to create a single PDF from multiple PDF files. How do I insert a new page into a PDF file that has multiple pages already in the file? When I use add page it is overwriting the existing pages and only saves the 1 page that was selected.
Here is the code that I am using to add the page to the existing PDF:
PdfReader reader = new PdfReader(sourcePdfPath);
Document document = new Document(reader.GetPageSizeWithRotation(1));
PdfCopy pdfCopy = new PdfCopy(document, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
MemoryStream memoryStream = new MemoryStream();
PdfWriter writer = PdfWriter.GetInstance(document, memoryStream);
document.AddDocListener(writer);
document.Open();
for (int p = 1; p <= reader.NumberOfPages; p++)
{
if (pagesToExtract.FindIndex(s => s == p) == -1) continue;
document.SetPageSize(reader.GetPageSize(p));
document.NewPage();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage pageImport = writer.GetImportedPage(reader, p);
int rot = reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0F, 1.0F, 0, 0, reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0F, 0, 0, 1.0F, 0, 0);
}
pdfCopy.AddPage(pageImport);
}
pdfCopy.Close();
This code works. You need to have a different file to output the results.
private static void AppendToDocument(string sourcePdfPath1, string sourcePdfPath2, string outputPdfPath)
{
using (var sourceDocumentStream1 = new FileStream(sourcePdfPath1, FileMode.Open))
{
using (var sourceDocumentStream2 = new FileStream(sourcePdfPath2, FileMode.Open))
{
using (var destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create))
{
var pdfConcat = new PdfConcatenate(destinationDocumentStream);
var pdfReader = new PdfReader(sourceDocumentStream1);
var pages = new List<int>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
pages.Add(i);
}
pdfReader.SelectPages(pages);
pdfConcat.AddPages(pdfReader);
pdfReader = new PdfReader(sourceDocumentStream2);
pages = new List<int>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
pages.Add(i);
}
pdfReader.SelectPages(pages);
pdfConcat.AddPages(pdfReader);
pdfReader.Close();
pdfConcat.Close();
}
}
}
}
I've tried this code, and it works for me, but don't forget to do some validations of the number of pages and existence of the paths you use
here is the code:
private static void AppendToDocument(string sourcePdfPath, string outputPdfPath, List<int> neededPages)
{
var sourceDocumentStream = new FileStream(sourcePdfPath, FileMode.Open);
var destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create);
var pdfConcat = new PdfConcatenate(destinationDocumentStream);
var pdfReader = new PdfReader(sourceDocumentStream);
pdfReader.SelectPages(neededPages);
pdfConcat.AddPages(pdfReader);
pdfReader.Close();
pdfConcat.Close();
}
You could use something like this, where src is the IEnumerable<string> of input pdf filenames. Just make sure that your existing pdf file is one of those sources.
The PdfConcatenate class is in the latest iTextSharp release.
var result = "combined.pdf";
var fs = new FileStream(result, FileMode.Create);
var conc = new PdfConcatenate(fs, true);
foreach(var s in src) {
var r = new PdfReader(s);
conc.AddPages(r);
}
conc.Close();
PdfCopy is intended for use with an empty Document. You should add everything you want, one page at a time.
The alternative is to use PdfStamper.InsertPage(pageNum, rectangle) and then draw a PdfImportedPage onto that new page.
Note that PdfImportedPage only includes the page contents, not the annotations or doc-level information ("document structure", doc-level javascripts, etc) that page may have originally used... unless you use one with PdfCopy.
A Stamper would probably be more efficient and use less code, but PdfCopy will import all the page-level info, not just the page's contents.
This might be important, it might not. It depends on what page you're trying to import.
Had to even out the page count with a multiple of 4:
private static void AppendToDocument(string sourcePdfPath)
{
var tempFileLocation = Path.GetTempFileName();
var bytes = File.ReadAllBytes(sourcePdfPath);
using (var reader = new PdfReader(bytes))
{
var numberofPages = reader.NumberOfPages;
var modPages = (numberofPages % 4);
var pages = modPages == 0 ? 0 : 4 - modPages;
if (pages == 0)
return;
using (var fileStream = new FileStream(tempFileLocation, FileMode.Create, FileAccess.Write))
{
using (var stamper = new PdfStamper(reader, fileStream))
{
var rectangle = reader.GetPageSize(1);
for (var i = 1; i <= pages; i++)
stamper.InsertPage(numberofPages + i, rectangle);
}
}
}
File.Delete(sourcePdfPath);
File.Move(tempFileLocation, sourcePdfPath);
}
I know I'm really late to the part here, but I mixed a bit of the two best answers and created a method if anyone needs it that adds a list of source PDF documents to a single document using itextsharp.
private static void appendToDocument(List<string> sourcePDFList, string outputPdfPath)
{
//Output document name and creation
FileStream destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create);
//Object to concat source pdf's to output pdf
PdfConcatenate pdfConcat = new PdfConcatenate(destinationDocumentStream);
//For each source pdf in list...
foreach (string sourcePdfPath in sourcePDFList)
{
//If file exists...
if (File.Exists(sourcePdfPath))
{
//Open the document
FileStream sourceDocumentStream = new FileStream(sourcePdfPath, FileMode.Open);
//Read the document
PdfReader pdfReader = new PdfReader(sourceDocumentStream);
//Create an int list
List<int> pages = new List<int>();
//for each page in pdfReader
for (int i = 1; i < pdfReader.NumberOfPages + 1; i++)
{
//Add that page to the list
pages.Add(i);
}
//Add that page to the pages to add to ouput document
pdfReader.SelectPages(pages);
//Add pages to output page
pdfConcat.AddPages(pdfReader);
//Close reader
pdfReader.Close();
}
}
//Close pdfconcat
pdfConcat.Close();
}