I'm trying to go through all the images in a PDF and resize them to reduce the PDF file size. I'm using iTextSharp in C#. Below is my code so far. When I look at the output file none of the images were modified.
PdfReader pdf = new PdfReader(input);
using (PdfStamper stp = new PdfStamper(pdf, output))
{
for (int i = 1; i <= pdf.NumberOfPages; i++)
{
PdfDictionary page = pdf.GetPageN(i);
PdfDictionary resources = (PdfDictionary)PdfReader.GetPdfObject(page.Get(PdfName.RESOURCES));
PdfDictionary xObjects = (PdfDictionary)PdfReader.GetPdfObject(resources.Get(PdfName.XOBJECT));
if (xObjects == null)
continue;
foreach (PdfName name in xObjects.Keys)
{
PdfObject xObj = xObjects.Get(name);
if (!xObj.IsIndirect())
continue;
//Filter non-images
PdfDictionary xObjDic = (PdfDictionary)PdfReader.GetPdfObject(xObj);
PdfName xObjType = (PdfName)PdfReader.GetPdfObject(xObjDic.Get(PdfName.SUBTYPE));
if (!PdfName.IMAGE.Equals(xObjType))
continue;
//Get embedded image
int refId = ((PRIndirectReference)xObj).Number;
PRStream objStream = (PRStream)pdf.GetPdfObject(refId);
PdfImageObject objImg = new PdfImageObject(objStream);
iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(objImg.GetImageAsBytes());
//Resize
img.ScaleAbsolute(200, 200);
img.SetDpi(72, 72);
PdfReader.KillIndirect(xObj);
stp.Writer.AddDirectImageSimple(img, (PRIndirectReference)xObj);
break;
}
}
}
pdf.Close();
Related
I have this code that will add a watermark on each page:
string watermarkLocation = AppDomain.CurrentDomain.BaseDirectory + "Watermark.png";
Document document = new Document();
PdfReader pdfReader = new PdfReader(fileLocation);
PdfStamper stamp = new PdfStamper(pdfReader, new FileStream(fileLocation.Replace(".pdf", "_marked.pdf"), FileMode.Create));
iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(watermarkLocation);
img.ScaleToFit(document.PageSize);
img.SetAbsolutePosition(0, 100);
PdfContentByte waterMark;
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
waterMark = stamp.GetOverContent(page);
waterMark.AddImage(img);
}
stamp.FormFlattening = true;
stamp.Close();
return fileLocation.Replace(".pdf", "_marked.pdf");
But on PDFs that have textboxes, the image will go behind the textbox/form. I thought flattening the file will fix this, but it does not work.
I used a full image as a test but the watermark in the end will have transparency.
Here's the final code I'm using. As my comment mentioned, there's basically 2 readers/stamps, one to flatten the file and another to add the watermark.
Flatten file:
private byte[] FlattenPdfFormToBytes(PdfReader reader)
{
var memStream = new MemoryStream();
var stamper = new PdfStamper(reader, memStream) { FormFlattening = true };
stamper.Close();
return memStream.ToArray();
}
Add Watermark (which will call FlattenPdfFormToBytes):
public string AddWatermark(string fileLocation)
{
string watermarkLocation = AppDomain.CurrentDomain.BaseDirectory + "Watermark.png";
Document document = new Document();
PdfReader pdfReader = new PdfReader(fileLocation);
PdfReader pdfFlatten = new PdfReader(FlattenPdfFormToBytes(pdfReader)); // The secret sauce is this!!!
PdfStamper stamp = new PdfStamper(pdfFlatten, new FileStream(fileLocation.Replace(".pdf", "_marked.pdf"), FileMode.Create));
iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(watermarkLocation);
img.ScaleToFit(document.PageSize);
img.SetAbsolutePosition(0, 100);
PdfContentByte waterMark;
for (int page = 1; page <= pdfFlatten.NumberOfPages; page++)
{
waterMark = stamp.GetOverContent(page);
waterMark.AddImage(img);
}
stamp.Close();
return fileLocation.Replace(".pdf", "_marked.pdf");
}
I am trying to read some pdf files page by page and add the pages to an existing pdf using itextsharp. Here is my solution:
string path2 = Server.MapPath("~/2.pdf");
PdfReader reader = null;
iTextSharp.text.Document document = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage = null;
foreach (var pdfName in pdfNames)
{
path1 = Path.Combine(Server.MapPath("~/Files/Pdf/temp/"), pdfName);
reader = new PdfReader(path1);
for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++)
{
document = new iTextSharp.text.Document(reader.GetPageSizeWithRotation(pageIndex));
pdfCopyProvider = new PdfCopy(document, new System.IO.FileStream(path2, System.IO.FileMode.Append));
document.Open();
importedPage = pdfCopyProvider.GetImportedPage(reader, pageIndex);
iTextSharp.text.Image image = iTextSharp.text.Image.GetInstance(importedPage);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
document.Close();
}
But the output pdf just contains the pages of last pdf in the foreach loop.
How would I merge several pdf pages into one with iTextSharp which also supports merging pages having form elements like textboxes, checkboxes, etc.
I have tried so many by googling, but nothing has worked well.
See my answer here Merging Memory Streams. I give an example of how to merge PDFs with itextsharp.
For updating form field names add this code that uses the stamper to change the form field names.
/// <summary>
/// Merges pdf files from a byte list
/// </summary>
/// <param name="files">list of files to merge</param>
/// <returns>memory stream containing combined pdf</returns>
public MemoryStream MergePdfForms(List<byte[]> files)
{
if (files.Count > 1)
{
string[] names;
PdfStamper stamper;
MemoryStream msTemp = null;
PdfReader pdfTemplate = null;
PdfReader pdfFile;
Document doc;
PdfWriter pCopy;
MemoryStream msOutput = new MemoryStream();
pdfFile = new PdfReader(files[0]);
doc = new Document();
pCopy = new PdfSmartCopy(doc, msOutput);
pCopy.PdfVersion = PdfWriter.VERSION_1_7;
doc.Open();
for (int k = 0; k < files.Count; k++)
{
for (int i = 1; i < pdfFile.NumberOfPages + 1; i++)
{
msTemp = new MemoryStream();
pdfTemplate = new PdfReader(files[k]);
stamper = new PdfStamper(pdfTemplate, msTemp);
names = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(names, 0);
foreach (string name in names)
{
stamper.AcroFields.RenameField(name, name + "_file" + k.ToString());
}
stamper.Close();
pdfFile = new PdfReader(msTemp.ToArray());
((PdfSmartCopy)pCopy).AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
pdfFile.Close();
pCopy.Close();
doc.Close();
return msOutput;
}
else if (files.Count == 1)
{
return new MemoryStream(files[0]);
}
return null;
}
Here is my simplified version of Jonathan's Merge code with namespaces added, and stamping removed.
public IO.MemoryStream MergePdfForms(System.Collections.Generic.List<byte[]> files)
{
if (files.Count > 1) {
using (System.IO.MemoryStream msOutput = new System.IO.MemoryStream()) {
using (iTextSharp.text.Document doc = new iTextSharp.text.Document()) {
using (iTextSharp.text.pdf.PdfSmartCopy pCopy = new iTextSharp.text.pdf.PdfSmartCopy(doc, msOutput) { PdfVersion = iTextSharp.text.pdf.PdfWriter.VERSION_1_7 }) {
doc.Open();
foreach (byte[] oFile in files) {
using (iTextSharp.text.pdf.PdfReader pdfFile = new iTextSharp.text.pdf.PdfReader(oFile)) {
for (i = 1; i <= pdfFile.NumberOfPages; i++) {
pCopy.AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
}
}
}
return msOutput;
}
} else if (files.Count == 1) {
return new System.IO.MemoryStream(files[0]);
}
return null;
}
to merge PDF see "Merging two pdf pages into one using itextsharp"
Below is my code for pdf merging.Thanks Jonathan for giving suggestion abt renaming fields,which resolved the issues while merging pdf pages with form fields.
private static void CombineAndSavePdf(string savePath, List<string> lstPdfFiles)
{
using (Stream outputPdfStream = new FileStream(savePath, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document document = new Document();
PdfSmartCopy copy = new PdfSmartCopy(document, outputPdfStream);
document.Open();
PdfReader reader;
int totalPageCnt;
PdfStamper stamper;
string[] fieldNames;
foreach (string file in lstPdfFiles)
{
reader = new PdfReader(file);
totalPageCnt = reader.NumberOfPages;
for (int pageCnt = 0; pageCnt < totalPageCnt; )
{
//have to create a new reader for each page or PdfStamper will throw error
reader = new PdfReader(file);
stamper = new PdfStamper(reader, outputPdfStream);
fieldNames = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(fieldNames, 0);
foreach (string name in fieldNames)
{
stamper.AcroFields.RenameField(name, name + "_file" + pageCnt.ToString());
}
copy.AddPage(copy.GetImportedPage(reader, ++pageCnt));
}
copy.FreeReader(reader);
}
document.Close();
}
}
How can I change the font of a existing PDF file with iTextSharp in C#?
I want to change the whole documents font into one e.g. Arial
Finally I solved the problem.
The following code will open an existing Pdf file and will change all its font to 'Braille' as per my expectation.
private static void ChangeFont()
{
string strFile = #"E:\\xyz.pdf";
string OutputFile = #"E:\\xyz1.pdf";
PdfReader pdfReader = new PdfReader(strFile);
//Get first page,Generally we get font information on first page,however we can loop throw pages e.g for(int i=0;i<=pdfReader.NumberOfPages;i++)
PdfDictionary cpage = pdfReader.GetPageN(1);
if (cpage == null)
return;
PdfDictionary dictFonts = cpage.GetAsDict(PdfName.RESOURCES).GetAsDict(PdfName.FONT);
if (dictFonts != null)
{
foreach (var font in dictFonts)
{
var dictFontInfo = dictFonts.GetAsDict(font.Key);
if (dictFontInfo != null)
{
foreach (var f in dictFontInfo)
{
//Get the font name-optional code
var baseFont = dictFontInfo.Get(PdfName.BASEFONT);
string strFontName = System.Text.Encoding.ASCII.GetString(baseFont.GetBytes(), 0,
baseFont.Length);
//
//Remove the current font
dictFontInfo.Remove(PdfName.BASEFONT);
//Set new font eg. Braille, Areal etc
dictFontInfo.Put(PdfName.BASEFONT, new PdfString("Braille"));
break;
}
}
}
}
//Now create a new document with updated font
using (FileStream FS = new FileStream(OutputFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document Doc = new Document())
{
using (PdfCopy writer = new PdfCopy(Doc, FS))
{
Doc.Open();
for (int j = 1; j <= pdfReader.NumberOfPages; j++)
{
writer.AddPage(writer.GetImportedPage(pdfReader, j));
}
Doc.Close();
}
}
}
pdfReader.Close();
}
We are using itextsharp to create a single PDF from multiple PDF files. How do I insert a new page into a PDF file that has multiple pages already in the file? When I use add page it is overwriting the existing pages and only saves the 1 page that was selected.
Here is the code that I am using to add the page to the existing PDF:
PdfReader reader = new PdfReader(sourcePdfPath);
Document document = new Document(reader.GetPageSizeWithRotation(1));
PdfCopy pdfCopy = new PdfCopy(document, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
MemoryStream memoryStream = new MemoryStream();
PdfWriter writer = PdfWriter.GetInstance(document, memoryStream);
document.AddDocListener(writer);
document.Open();
for (int p = 1; p <= reader.NumberOfPages; p++)
{
if (pagesToExtract.FindIndex(s => s == p) == -1) continue;
document.SetPageSize(reader.GetPageSize(p));
document.NewPage();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage pageImport = writer.GetImportedPage(reader, p);
int rot = reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0F, 1.0F, 0, 0, reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0F, 0, 0, 1.0F, 0, 0);
}
pdfCopy.AddPage(pageImport);
}
pdfCopy.Close();
This code works. You need to have a different file to output the results.
private static void AppendToDocument(string sourcePdfPath1, string sourcePdfPath2, string outputPdfPath)
{
using (var sourceDocumentStream1 = new FileStream(sourcePdfPath1, FileMode.Open))
{
using (var sourceDocumentStream2 = new FileStream(sourcePdfPath2, FileMode.Open))
{
using (var destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create))
{
var pdfConcat = new PdfConcatenate(destinationDocumentStream);
var pdfReader = new PdfReader(sourceDocumentStream1);
var pages = new List<int>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
pages.Add(i);
}
pdfReader.SelectPages(pages);
pdfConcat.AddPages(pdfReader);
pdfReader = new PdfReader(sourceDocumentStream2);
pages = new List<int>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
pages.Add(i);
}
pdfReader.SelectPages(pages);
pdfConcat.AddPages(pdfReader);
pdfReader.Close();
pdfConcat.Close();
}
}
}
}
I've tried this code, and it works for me, but don't forget to do some validations of the number of pages and existence of the paths you use
here is the code:
private static void AppendToDocument(string sourcePdfPath, string outputPdfPath, List<int> neededPages)
{
var sourceDocumentStream = new FileStream(sourcePdfPath, FileMode.Open);
var destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create);
var pdfConcat = new PdfConcatenate(destinationDocumentStream);
var pdfReader = new PdfReader(sourceDocumentStream);
pdfReader.SelectPages(neededPages);
pdfConcat.AddPages(pdfReader);
pdfReader.Close();
pdfConcat.Close();
}
You could use something like this, where src is the IEnumerable<string> of input pdf filenames. Just make sure that your existing pdf file is one of those sources.
The PdfConcatenate class is in the latest iTextSharp release.
var result = "combined.pdf";
var fs = new FileStream(result, FileMode.Create);
var conc = new PdfConcatenate(fs, true);
foreach(var s in src) {
var r = new PdfReader(s);
conc.AddPages(r);
}
conc.Close();
PdfCopy is intended for use with an empty Document. You should add everything you want, one page at a time.
The alternative is to use PdfStamper.InsertPage(pageNum, rectangle) and then draw a PdfImportedPage onto that new page.
Note that PdfImportedPage only includes the page contents, not the annotations or doc-level information ("document structure", doc-level javascripts, etc) that page may have originally used... unless you use one with PdfCopy.
A Stamper would probably be more efficient and use less code, but PdfCopy will import all the page-level info, not just the page's contents.
This might be important, it might not. It depends on what page you're trying to import.
Had to even out the page count with a multiple of 4:
private static void AppendToDocument(string sourcePdfPath)
{
var tempFileLocation = Path.GetTempFileName();
var bytes = File.ReadAllBytes(sourcePdfPath);
using (var reader = new PdfReader(bytes))
{
var numberofPages = reader.NumberOfPages;
var modPages = (numberofPages % 4);
var pages = modPages == 0 ? 0 : 4 - modPages;
if (pages == 0)
return;
using (var fileStream = new FileStream(tempFileLocation, FileMode.Create, FileAccess.Write))
{
using (var stamper = new PdfStamper(reader, fileStream))
{
var rectangle = reader.GetPageSize(1);
for (var i = 1; i <= pages; i++)
stamper.InsertPage(numberofPages + i, rectangle);
}
}
}
File.Delete(sourcePdfPath);
File.Move(tempFileLocation, sourcePdfPath);
}
I know I'm really late to the part here, but I mixed a bit of the two best answers and created a method if anyone needs it that adds a list of source PDF documents to a single document using itextsharp.
private static void appendToDocument(List<string> sourcePDFList, string outputPdfPath)
{
//Output document name and creation
FileStream destinationDocumentStream = new FileStream(outputPdfPath, FileMode.Create);
//Object to concat source pdf's to output pdf
PdfConcatenate pdfConcat = new PdfConcatenate(destinationDocumentStream);
//For each source pdf in list...
foreach (string sourcePdfPath in sourcePDFList)
{
//If file exists...
if (File.Exists(sourcePdfPath))
{
//Open the document
FileStream sourceDocumentStream = new FileStream(sourcePdfPath, FileMode.Open);
//Read the document
PdfReader pdfReader = new PdfReader(sourceDocumentStream);
//Create an int list
List<int> pages = new List<int>();
//for each page in pdfReader
for (int i = 1; i < pdfReader.NumberOfPages + 1; i++)
{
//Add that page to the list
pages.Add(i);
}
//Add that page to the pages to add to ouput document
pdfReader.SelectPages(pages);
//Add pages to output page
pdfConcat.AddPages(pdfReader);
//Close reader
pdfReader.Close();
}
}
//Close pdfconcat
pdfConcat.Close();
}