How would I merge several pdf pages into one with iTextSharp which also supports merging pages having form elements like textboxes, checkboxes, etc.
I have tried so many by googling, but nothing has worked well.
See my answer here Merging Memory Streams. I give an example of how to merge PDFs with itextsharp.
For updating form field names add this code that uses the stamper to change the form field names.
/// <summary>
/// Merges pdf files from a byte list
/// </summary>
/// <param name="files">list of files to merge</param>
/// <returns>memory stream containing combined pdf</returns>
public MemoryStream MergePdfForms(List<byte[]> files)
{
if (files.Count > 1)
{
string[] names;
PdfStamper stamper;
MemoryStream msTemp = null;
PdfReader pdfTemplate = null;
PdfReader pdfFile;
Document doc;
PdfWriter pCopy;
MemoryStream msOutput = new MemoryStream();
pdfFile = new PdfReader(files[0]);
doc = new Document();
pCopy = new PdfSmartCopy(doc, msOutput);
pCopy.PdfVersion = PdfWriter.VERSION_1_7;
doc.Open();
for (int k = 0; k < files.Count; k++)
{
for (int i = 1; i < pdfFile.NumberOfPages + 1; i++)
{
msTemp = new MemoryStream();
pdfTemplate = new PdfReader(files[k]);
stamper = new PdfStamper(pdfTemplate, msTemp);
names = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(names, 0);
foreach (string name in names)
{
stamper.AcroFields.RenameField(name, name + "_file" + k.ToString());
}
stamper.Close();
pdfFile = new PdfReader(msTemp.ToArray());
((PdfSmartCopy)pCopy).AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
pdfFile.Close();
pCopy.Close();
doc.Close();
return msOutput;
}
else if (files.Count == 1)
{
return new MemoryStream(files[0]);
}
return null;
}
Here is my simplified version of Jonathan's Merge code with namespaces added, and stamping removed.
public IO.MemoryStream MergePdfForms(System.Collections.Generic.List<byte[]> files)
{
if (files.Count > 1) {
using (System.IO.MemoryStream msOutput = new System.IO.MemoryStream()) {
using (iTextSharp.text.Document doc = new iTextSharp.text.Document()) {
using (iTextSharp.text.pdf.PdfSmartCopy pCopy = new iTextSharp.text.pdf.PdfSmartCopy(doc, msOutput) { PdfVersion = iTextSharp.text.pdf.PdfWriter.VERSION_1_7 }) {
doc.Open();
foreach (byte[] oFile in files) {
using (iTextSharp.text.pdf.PdfReader pdfFile = new iTextSharp.text.pdf.PdfReader(oFile)) {
for (i = 1; i <= pdfFile.NumberOfPages; i++) {
pCopy.AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
}
}
}
return msOutput;
}
} else if (files.Count == 1) {
return new System.IO.MemoryStream(files[0]);
}
return null;
}
to merge PDF see "Merging two pdf pages into one using itextsharp"
Below is my code for pdf merging.Thanks Jonathan for giving suggestion abt renaming fields,which resolved the issues while merging pdf pages with form fields.
private static void CombineAndSavePdf(string savePath, List<string> lstPdfFiles)
{
using (Stream outputPdfStream = new FileStream(savePath, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document document = new Document();
PdfSmartCopy copy = new PdfSmartCopy(document, outputPdfStream);
document.Open();
PdfReader reader;
int totalPageCnt;
PdfStamper stamper;
string[] fieldNames;
foreach (string file in lstPdfFiles)
{
reader = new PdfReader(file);
totalPageCnt = reader.NumberOfPages;
for (int pageCnt = 0; pageCnt < totalPageCnt; )
{
//have to create a new reader for each page or PdfStamper will throw error
reader = new PdfReader(file);
stamper = new PdfStamper(reader, outputPdfStream);
fieldNames = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(fieldNames, 0);
foreach (string name in fieldNames)
{
stamper.AcroFields.RenameField(name, name + "_file" + pageCnt.ToString());
}
copy.AddPage(copy.GetImportedPage(reader, ++pageCnt));
}
copy.FreeReader(reader);
}
document.Close();
}
}
Related
I am using iTextSharp in asp.net (C#) in order to merge more than one PDf template. There is a functionality of print which prints only data of the template. While merging the templates the fields/Controls which are available in the template is renamed using RenameField Method of iTextSharp.
This implementation has broken the Print functionality. As the Print functionality has been written according to the fields.
For merging Templates, I am using a PDFCopy.
Document document = new Document();
bool flag = true;
using (FileStream fileStream = File.Create(newFile))
{
PdfSmartCopy copy = new PdfSmartCopy(document, fileStream);
PdfReader reader;
MemoryStream baos;
for (int i = 0; i < loopCount; i++)
{
reader = new PdfReader(pdfTemplate);
baos = new MemoryStream();
stamper = new PdfStamper(reader, baos);
AcroFields pdfDoc = stamper.AcroFields;
BuildData(datarow,pdfDoc, obj)
renameFields(reader);
stamper.FormFlattening = false;
stamper.Close();
reader = new PdfReader(baos.ToArray());
copy.AddPage(copy.GetImportedPage(reader, 1));
}
document.Close();
strFileName = newFile;
}
private static void renameFields(PdfReader pdfReader)
{
string prepend = String.Format("_{0}", counter++);
foreach (KeyValuePair<string, AcroFields.Item> de in pdfReader.AcroFields.Fields)
{
pdfReader.AcroFields.RenameField(de.Key.ToString(), prepend + de.Key.ToString());
}
}
Edit1: This is the solution i found in itextsharp doc,BUT it's not working
"Using PdfCopy with documents
that have named destinations is one of these exceptions. All annotations, such
as link annotations, are kept with PdfCopy, but they no longer work for links to local
named destinations. There is a workaround for this problem."
PdfReader[] readers = {
new PdfReader(LinkActions.RESULT2),
new PdfReader(LinkActions.RESULT1) };
Document document = new Document();
PdfCopy copy =
new PdfCopy(document, new FileOutputStream(RESULT1));
document.open();
int n;
for (int i = 0; i < readers.length; i++) {
readers[i].consolidateNamedDestinations();
n = readers[i].getNumberOfPages();
for (int page = 0; page < n; ) {
copy.addPage(copy.getImportedPage(readers[i], ++page));
}
}
The forms no longer work because you have forgotten to add a single line: copy.setMergeFields();
See the MergeForms2 example:
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
Document document = new Document();
PdfCopy copy = new PdfSmartCopy(document, new FileOutputStream(dest));
copy.setMergeFields();
document.open();
List<PdfReader> readers = new ArrayList<PdfReader>();
for (int i = 0; i < 3; ) {
PdfReader reader = new PdfReader(renameFields(src, ++i));
readers.add(reader);
copy.addDocument(reader);
}
document.close();
for (PdfReader reader : readers) {
reader.close();
}
}
public byte[] renameFields(String src, int i) throws IOException, DocumentException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfReader reader = new PdfReader(src);
PdfStamper stamper = new PdfStamper(reader, baos);
AcroFields form = stamper.getAcroFields();
Set<String> keys = new HashSet<String>(form.getFields().keySet());
for (String key : keys) {
form.renameField(key, String.format("%s_%d", key, i));
}
stamper.close();
reader.close();
return baos.toByteArray();
}
It seems that you're also making the assumption that your template consists of only one page:
copy.addPage(copy.getImportedPage(reader, 1));
It is safer to add the document all at once:
copy.addDocument(reader);
Important:
My examples are written in Java. You are working with iTextSharp in C#. You will have to adapt the methods by changing the Java-specific methods into C#-specific properties or methods.
How can I change the font of a existing PDF file with iTextSharp in C#?
I want to change the whole documents font into one e.g. Arial
Finally I solved the problem.
The following code will open an existing Pdf file and will change all its font to 'Braille' as per my expectation.
private static void ChangeFont()
{
string strFile = #"E:\\xyz.pdf";
string OutputFile = #"E:\\xyz1.pdf";
PdfReader pdfReader = new PdfReader(strFile);
//Get first page,Generally we get font information on first page,however we can loop throw pages e.g for(int i=0;i<=pdfReader.NumberOfPages;i++)
PdfDictionary cpage = pdfReader.GetPageN(1);
if (cpage == null)
return;
PdfDictionary dictFonts = cpage.GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.FONT);
if (dictFonts != null)
{
foreach (var font in dictFonts)
{
var dictFontInfo = dictFonts.GetAsDict(font.Key);
if (dictFontInfo != null)
{
foreach (var f in dictFontInfo)
{
//Get the font name-optional code
var baseFont = dictFontInfo.Get(PdfName.BASEFONT);
string strFontName = System.Text.Encoding.ASCII.GetString(baseFont.GetBytes(), 0,
baseFont.Length);
//
//Remove the current font
dictFontInfo.Remove(PdfName.BASEFONT);
//Set new font eg. Braille, Areal etc
dictFontInfo.Put(PdfName.BASEFONT, new PdfString("Braille"));
break;
}
}
}
}
//Now create a new document with updated font
using (FileStream FS = new FileStream(OutputFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document Doc = new Document())
{
using (PdfCopy writer = new PdfCopy(Doc, FS))
{
Doc.Open();
for (int j = 1; j <= pdfReader.NumberOfPages; j++)
{
writer.AddPage(writer.GetImportedPage(pdfReader, j));
}
Doc.Close();
}
}
}
pdfReader.Close();
}
I'm generating a PDF document based on template. The document has multiple pages. The document can have about 5000 pages. When creating the 500-th page I get an overflow RAM (memory). Any idea?
public static void CreateBankBlank2012Year(string pdfTemplatePath, string directoryOutPdf, string nameOutPdf, AnnualReportsFilterParameters filterParametrs, string serverPath)
{
// Get details salary
IEnumerable<SalayDetailsForPdf> dataSalaryDetails = (IEnumerable<SalayDetailsForPdf>) GetSalaryData(filterParametrs);
String fontPath = Path.Combine(serverPath + "\\Fonts", "STSONG.ttf");
Font font = FontFactory.GetFont(fontPath, BaseFont.IDENTITY_H, 8);
using (Document document = new Document())
{
using (PdfSmartCopy copy = new PdfSmartCopy(
document, new FileStream(directoryOutPdf + nameOutPdf, FileMode.Create))
)
{
document.Open();
foreach (var data in dataSalaryDetails)
{
PdfReader reader = new PdfReader(pdfTemplatePath + #"\EmptyTemplateBankBlank_2012.pdf");
using (var ms = new MemoryStream())
{
using (PdfStamper stamper = new PdfStamper(reader, ms))
{
stamper.AcroFields.AddSubstitutionFont(font.BaseFont);
AcroFields form = stamper.AcroFields;
form.SetField("t1_address1", data.Address1);
form.SetField("t1_name", data.NameHieroglyphic);
// Other field ...
stamper.FormFlattening = true;
}
reader = new PdfReader(ms.ToArray());
copy.AddPage(copy.GetImportedPage(reader, 1));
}
}
}
}
}
p.s
I'm trying resolve my problem as follow:
generating empty pages based on template
private static void GeneratePdfFromTemplate(string directoryOutPdf, string nameOutPdf, string pdfTemplatePath, int countPages)
{
using (Document document = new Document())
{
using (PdfSmartCopy copy = new PdfSmartCopy(
document, new FileStream(directoryOutPdf + nameOutPdf, FileMode.Create))
)
{
document.Open();
PdfReader reader = new PdfReader(pdfTemplatePath + #"\EmptyTemplateBankBlank_2012.pdf");
for (int i = 0; i < countPages; i++)
{
copy.AddPage(copy.GetImportedPage(reader, 1));
}
reader.Close();
copy.Close();
}
document.Close();
}
GC.Collect();
}
But after a generating I can't set values to the fields.
I'm found solution for my problem, if anyone else would be interested :
private static void SettingFieltValue(Font font, IEnumerable<SalayDetailsForPdf> dataSalaryDetails, int selectedYear, string directoryOutPdf, string nameOutPdf, string pdfTemplatePath)
{
string pdfTemplate = pdfTemplatePath + #"\EmptyTemplateBankBlank_2012.pdf";
string newFile = directoryOutPdf + nameOutPdf;
var fs = new FileStream(newFile, FileMode.Create);
var conc = new PdfConcatenate(fs, true);
foreach (var data in dataSalaryDetails)
{
var reader = new PdfReader(pdfTemplate);
using (var ms = new MemoryStream())
{
using (PdfStamper stamper = new PdfStamper(reader, ms))
{
stamper.AcroFields.AddSubstitutionFont(font.BaseFont);
AcroFields form = stamper.AcroFields;
form.SetField("t1_name", data.NameHieroglyphic);
//Other field
stamper.FormFlattening = true;
stamper.Close();
}
reader = new PdfReader(ms.ToArray());
ms.Close();
}
conc.AddPages(reader);
reader.Close();
}
conc.Close();
}
We are using itextsharp to create a single PDF from multiple PDF files. How do I insert a new page into a PDF file that has multiple pages already in the file? When I use add page it is overwriting the existing pages and only saves the 1 page that was selected.
Here is the code that I am using to add the page to the existing PDF:
PdfReader reader = new PdfReader(sourcePdfPath);
Document document = new Document(reader.GetPageSizeWithRotation(1));
PdfCopy pdfCopy = new PdfCopy(document, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
MemoryStream memoryStream = new MemoryStream();
PdfWriter writer = PdfWriter.GetInstance(document, memoryStream);
document.AddDocListener(writer);
document.Open();
for (int p = 1; p <= reader.NumberOfPages; p++)
{
if (pagesToExtract.FindIndex(s => s == p) == -1) continue;
document.SetPageSize(reader.GetPageSize(p));
document.NewPage();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage pageImport = writer.GetImportedPage(reader, p);
int rot = reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0F, 1.0F, 0, 0, reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0F, 0, 0, 1.0F, 0, 0);
}
pdfCopy.AddPage(pageImport);
}
pdfCopy.Close();
This code works. You need to have a different file to output the results.
private static void AppendToDocument(string sourcePdfPath1, string sourcePdfPath2, string outputPdfPath)
{
using (var sourceDocumentStream1 = new FileStream(sourcePdfPath1, FileMode.Open))
{
using (var sourceDocumentStream2 = new FileStream(sourcePdfPath2, FileMode.Open))
{
using (var destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create))
{
var pdfConcat = new PdfConcatenate(destinationDocumentStream);
var pdfReader = new PdfReader(sourceDocumentStream1);
var pages = new List<int>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
pages.Add(i);
}
pdfReader.SelectPages(pages);
pdfConcat.AddPages(pdfReader);
pdfReader = new PdfReader(sourceDocumentStream2);
pages = new List<int>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
pages.Add(i);
}
pdfReader.SelectPages(pages);
pdfConcat.AddPages(pdfReader);
pdfReader.Close();
pdfConcat.Close();
}
}
}
}
I've tried this code, and it works for me, but don't forget to do some validations of the number of pages and existence of the paths you use
here is the code:
private static void AppendToDocument(string sourcePdfPath, string outputPdfPath, List<int> neededPages)
{
var sourceDocumentStream = new FileStream(sourcePdfPath, FileMode.Open);
var destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create);
var pdfConcat = new PdfConcatenate(destinationDocumentStream);
var pdfReader = new PdfReader(sourceDocumentStream);
pdfReader.SelectPages(neededPages);
pdfConcat.AddPages(pdfReader);
pdfReader.Close();
pdfConcat.Close();
}
You could use something like this, where src is the IEnumerable<string> of input pdf filenames. Just make sure that your existing pdf file is one of those sources.
The PdfConcatenate class is in the latest iTextSharp release.
var result = "combined.pdf";
var fs = new FileStream(result, FileMode.Create);
var conc = new PdfConcatenate(fs, true);
foreach(var s in src) {
var r = new PdfReader(s);
conc.AddPages(r);
}
conc.Close();
PdfCopy is intended for use with an empty Document. You should add everything you want, one page at a time.
The alternative is to use PdfStamper.InsertPage(pageNum, rectangle) and then draw a PdfImportedPage onto that new page.
Note that PdfImportedPage only includes the page contents, not the annotations or doc-level information ("document structure", doc-level javascripts, etc) that page may have originally used... unless you use one with PdfCopy.
A Stamper would probably be more efficient and use less code, but PdfCopy will import all the page-level info, not just the page's contents.
This might be important, it might not. It depends on what page you're trying to import.
Had to even out the page count with a multiple of 4:
private static void AppendToDocument(string sourcePdfPath)
{
var tempFileLocation = Path.GetTempFileName();
var bytes = File.ReadAllBytes(sourcePdfPath);
using (var reader = new PdfReader(bytes))
{
var numberofPages = reader.NumberOfPages;
var modPages = (numberofPages % 4);
var pages = modPages == 0 ? 0 : 4 - modPages;
if (pages == 0)
return;
using (var fileStream = new FileStream(tempFileLocation, FileMode.Create, FileAccess.Write))
{
using (var stamper = new PdfStamper(reader, fileStream))
{
var rectangle = reader.GetPageSize(1);
for (var i = 1; i <= pages; i++)
stamper.InsertPage(numberofPages + i, rectangle);
}
}
}
File.Delete(sourcePdfPath);
File.Move(tempFileLocation, sourcePdfPath);
}
I know I'm really late to the part here, but I mixed a bit of the two best answers and created a method if anyone needs it that adds a list of source PDF documents to a single document using itextsharp.
private static void appendToDocument(List<string> sourcePDFList, string outputPdfPath)
{
//Output document name and creation
FileStream destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create);
//Object to concat source pdf's to output pdf
PdfConcatenate pdfConcat = new PdfConcatenate(destinationDocumentStream);
//For each source pdf in list...
foreach (string sourcePdfPath in sourcePDFList)
{
//If file exists...
if (File.Exists(sourcePdfPath))
{
//Open the document
FileStream sourceDocumentStream = new FileStream(sourcePdfPath, FileMode.Open);
//Read the document
PdfReader pdfReader = new PdfReader(sourceDocumentStream);
//Create an int list
List<int> pages = new List<int>();
//for each page in pdfReader
for (int i = 1; i < pdfReader.NumberOfPages + 1; i++)
{
//Add that page to the list
pages.Add(i);
}
//Add that page to the pages to add to ouput document
pdfReader.SelectPages(pages);
//Add pages to output page
pdfConcat.AddPages(pdfReader);
//Close reader
pdfReader.Close();
}
}
//Close pdfconcat
pdfConcat.Close();
}
The code snippet below returns a corrupt PDF document however if I return mergedDocument instead it always returns a valid PDF. mergedDocument is based on a PDF file i created using Word, whereas completed document is entirely programmatically generated. The code "works" in that it throws no exceptions. Why is iTextSharp creating a corrupt PDF?
byte[] completedDocument = null;
using (MemoryStream streamCompleted = new MemoryStream())
{
using (Document document = new Document())
{
PdfCopy copy = new PdfCopy(document, streamCompleted);
document.Open();
copy.Open();
foreach (var item in eventItems)
{
byte[] mergedDocument = null;
PdfReader reader = new PdfReader(pdfTemplates[item.DataTokens[NotifyTokenType.OrganisationID]]);
using (MemoryStream streamTemplate = new MemoryStream())
{
using (PdfStamper stamper = new PdfStamper(reader, streamTemplate))
{
foreach (var token in item.DataTokens)
{
if (stamper.AcroFields.Fields.Any(fld => fld.Key == token.Key.ToString()))
{
stamper.AcroFields.SetField(token.Key.ToString(), token.Value);
}
}
stamper.FormFlattening = true;
stamper.Writer.CloseStream = false;
}
mergedDocument = new byte[streamTemplate.Length];
streamTemplate.Position = 0;
streamTemplate.Read(mergedDocument, 0, (int)streamTemplate.Length);
}
reader = new PdfReader(mergedDocument);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
document.SetPageSize(PageSize.A4);
copy.AddPage(copy.GetImportedPage(reader, i));
}
}
completedDocument = new byte[streamCompleted.Length];
streamCompleted.Position = 0;
streamCompleted.Read(completedDocument, 0, (int)streamCompleted.Length);
}
}
return completedDocument;
You need to close the document and copy objects to flush the PDF writing buffer. This, however, causes some problems when trying to read the stream into an array. The fix for that is to use the ToArray() method of the MemoryStream which still works on closed streams. The changes I made have comments on them.
byte[] completedDocument = null;
using (MemoryStream streamCompleted = new MemoryStream())
{
using (Document document = new Document())
{
PdfCopy copy = new PdfCopy(document, streamCompleted);
document.Open();
copy.Open();
foreach (var item in eventItems)
{
byte[] mergedDocument = null;
PdfReader reader = new PdfReader(pdfTemplates[item.DataTokens[NotifyTokenType.OrganisationID]]);
using (MemoryStream streamTemplate = new MemoryStream())
{
using (PdfStamper stamper = new PdfStamper(reader, streamTemplate))
{
foreach (var token in item.DataTokens)
{
if (stamper.AcroFields.Fields.Any(fld => fld.Key == token.Key.ToString()))
{
stamper.AcroFields.SetField(token.Key.ToString(), token.Value);
}
}
stamper.FormFlattening = true;
stamper.Writer.CloseStream = false;
}
//Copy the stream's bytes
mergedDocument = streamTemplate.ToArray();
}
reader = new PdfReader(mergedDocument);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
document.SetPageSize(PageSize.A4);
copy.AddPage(copy.GetImportedPage(reader, i));
}
//Close the document and the copy
document.Close();
copy.Close();
}
//ToArray() can operate on closed streams
completedDocument = streamCompleted.ToArray();
}
}
return completedDocument;
Also make sure your html doesn't contains hr tag while converting html to pdf
hdnEditorText.Value.Replace("\"", "'").Replace("<hr />", "").Replace("<hr/>", "")