Memory error joining pdf files - c#

I'm building a program in C# that reads 3 PDF files, performs some sorting based on the user ID and generates an output PDF file.
It currently generates 1254 pdf files(approx 5 MB each) and when I try to join them, throws an Out Of Memmory Exception .
How can I join these files, considering that it results in a 4 GB file size?
The following code joins all PDF files into a single output file.
public void CombineMultiplePDFs(string[] fileNames, string outFile)
{
int pageOffset = 0;
ArrayList master = new ArrayList();
int f = 0;
Document document = null;
PdfCopy writer = null;
while (f < fileNames.Length)
{
// we create a reader for a certain document
PdfReader reader = new PdfReader(fileNames[f]);
reader.ConsolidateNamedDestinations();
// we avisorieve the total number of pages
int n = reader.NumberOfPages;
pageOffset += n;
if (f == 0)
{
// step 1: creation of a document-object
document = new Document(reader.GetPageSizeWithRotation(1));
// step 2: we create a writer that listens to the document
writer = new PdfCopy(document, new FileStream(outFile, FileMode.Create));
// step 3: we open the document
document.Open();
}
// step 4: we add content
for (int i = 0; i < n; )
{
++i;
if (writer != null)
{
PdfImportedPage page = writer.GetImportedPage(reader, i);
writer.AddPage(page);
}
}
PRAcroForm form = reader.AcroForm;
if (form != null && writer != null)
{
writer.CopyAcroForm(reader);
}
f++;
}
// step 5: we close the document
if (document != null)
{
document.Close();
}
}

Related

How to extract images from pdf and save each as a jpeg with C# & itext7

this is a C# winforms project
Given a file folder, need to open any pdf file and for each file,
evaluate every object. If the object is an image, extract it, convert it and save it
as a Jpeg file in that same folder.
private void btnExtractImages_Click(object sender, EventArgs e)
{
try
{
DirectoryInfo di = new DirectoryInfo(txtSourcePdf.Text);
foreach (FileInfo fi in di.GetFiles())
{
PdfDocument doc = new PdfDocument(new PdfReader(fi.FullName));
string fNm = fi.Name.Replace(".pdf", ""); //Filename without the extension
int pgs = doc.GetNumberOfPages();
for (int pg = 1; pg <= pgs; pg++)
{
PdfPage pdfPage = doc.GetPage(pg);
PdfFormXObject xObj = pdfPage.CopyAsFormXObject(doc);
iText.Layout.Element.Image img = new iText.Layout.Element.Image(xObj);
//good so far.
//need to turn this xObj or img into a jpg and save to disk
...
}
// This works, however it creates a .dat file, not a .jpg file
// int objs = doc.GetNumberOfPdfObjects();
// for (int o = 1; o <= objs; o++)
// {
// PdfObject obj = doc.GetPdfObject(o);
// if (obj != null && obj.IsStream())
// {
// byte[] b;
// try
// {
// // Get decoded stream bytes.
// b = ((PdfStream)obj).GetBytes();
// }
// catch (Exception)
// {
// //Get originally encoded stream bytes
// b = ((PdfStream)obj).GetBytes(false);
// }
// using (FileStream fos = new FileStream(String.Format(txtSourcePdf.Text + "/" + fNm + "{0}.jpg", o), FileMode.Create))
// {
// fos.Write(b, 0, b.Length);
// }
}
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}

ABCpdf only rendering first page

When I try to save each page as GIF using ABCpdf, only the first page is saved.
For example: I have a PDF that has 3 pages. I use ABCpdf to render each page to a stream, which is saved to disk. When I open the files in my destination folder, all 3 files show the first page content.
Here's my code:
using (Doc theDoc = new Doc())
{
XReadOptions options = new XReadOptions { ReadModule = ReadModuleType.Pdf };
theDoc.Read(inputbytearray, options);
using (MemoryStream ms = new MemoryStream())
{
theDoc.Rendering.DotsPerInch = 150;
int n = theDoc.PageCount;
for (int i = 1; i <= n; i++)
{
Guid FileName = Guid.NewGuid();
theDoc.Rect.String = theDoc.CropBox.String;
theDoc.Rendering.SaveAppend = (i != 1);
theDoc.Rendering.SaveCompression = XRendering.Compression.G4;
theDoc.PageNumber = i;
theDoc.Rendering.Save(string.Format("{0}.gif", FileName), ms);
using (var streamupload = new MemoryStream(ms.GetBuffer(), writable: false))
{
_BlobStorageService.UploadfromStream(FileName.ToString(), streamupload, STR_Gif, STR_Imagegif);
}
}
// theDoc.Clear();
}
}
The Rendering.SaveAppend property is only applicable when saving TIFF images. For GIFs you would need to save a separate image for each PDF page.
private void button1_Click(object sender, System.EventArgs e)
{
string theDir = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.FullName + #"\files\";
// Create test PDF
using (Doc doc = new Doc())
{
for (int i = 1; i <= 3; i++)
{
doc.Page = doc.AddPage();
doc.AddHtml("<font size=24>PAGE " + i.ToString());
}
doc.Save(Path.Combine(theDir, "test.pdf"));
}
// Save PDF pages to GIF streams
using (Doc doc = new Doc())
{
doc.Read(Path.Combine(theDir, "test.pdf"));
for (int i = 1; i <= doc.PageCount; i++)
{
doc.PageNumber = i;
doc.Rect.String = doc.CropBox.String;
using (MemoryStream ms = new MemoryStream())
{
doc.Rendering.Save("dummy.gif", ms);
using (FileStream fs = File.Create(Path.Combine(theDir, "p" + i.ToString() + ".gif")))
{
ms.Seek(0, SeekOrigin.Begin);
ms.CopyTo(fs);
}
}
}
}
}

How to Add bookmarks to PDF file?

i have 4 pdf templates files by using itextsharp i added values and i merged/added 4 pdf files into single document, so all 4 pages are under one single pdf file name.Now i want to add bookmark to my pdf file. is there any way to do in C# ?for better understanding ,please refer below images
Hi ,this is what i am trying to do, i am not getting any error but still there is no bookmark in my pdf, i want to add bookmark with 4 sections as showed in the image.after merging i want add bookmark to final pdf.
enter code herepublic string MergePDFs()
{
string outPutFilePath = #"D:\jeldsbre.pdf";
string genereatedpdfs = #"D:\genereatedpdfs";
using (FileStream stream = new FileStream(outPutFilePath, FileMode.Create))
{
Document pdfDoc = new Document(PageSize.A4);
PdfCopy pdf = new PdfCopy(pdfDoc, stream);
pdf.SetMergeFields();
pdfDoc.Open();
var files = Directory.GetFiles(genereatedpdfs);
Console.WriteLine("Merging files count: " + files.Length);
int i = 1;
foreach (string file in files)
{
Console.WriteLine(i + ". Adding: " + file);
pdf.AddDocument(new PdfReader(file));
i++;
}
List<Dictionary<string, object>> bookmarks = new List<Dictionary<string, object>>();
IList<Dictionary<string, object>> tempBookmarks = new List<Dictionary<string, object>>();
SimpleBookmark.ShiftPageNumbers(tempBookmarks, 1, null);
bookmarks.AddRange(tempBookmarks);
SimpleBookmark.ShiftPageNumbers(tempBookmarks, 3, null);
bookmarks.AddRange(tempBookmarks);
pdf.Outlines = bookmarks;
if (pdfDoc != null)
pdfDoc.Close();
string base64 = GetBase64(outPutFilePath);
return base64;
}
}
Assuming that your original PDFs already have bookmarks, then you should concatenate not only the documents (using the PdfCopy class), you should also concatenate the different bookmarks structures of the different files (using the SimpleBookMark class), not forgetting to take into account that you need to shift the page numbers correctly.
This is done in the ConcatenateBookmarks example in chapter 7 of my book:
// Create a list for the bookmarks
ArrayList<HashMap<String, Object>> bookmarks = new ArrayList<HashMap<String, Object>>();
List<HashMap<String, Object>> tmp;
for (int i = 0; i < src.length; i++) {
reader = new PdfReader(src[i]);
// merge the bookmarks
tmp = SimpleBookmark.getBookmark(reader);
SimpleBookmark.shiftPageNumbers(tmp, page_offset, null);
bookmarks.addAll(tmp);
// add the pages
n = reader.getNumberOfPages();
page_offset += n;
for (int page = 0; page < n; ) {
copy.addPage(copy.getImportedPage(reader, ++page));
}
copy.freeReader(reader);
reader.close();
}
// Add the merged bookmarks
copy.setOutlines(bookmarks);
For a C# version of this example, please take a look at http://tinyurl.com/itextsharpIIA2C07 for the corresponding iTextSharp example:
// Create a list for the bookmarks
List<Dictionary<String, Object>> bookmarks =
new List<Dictionary<String, Object>>();
for (int i = 0; i < src.Count; i++) {
PdfReader reader = new PdfReader(src[i]);
// merge the bookmarks
IList<Dictionary<String, Object>> tmp =
SimpleBookmark.GetBookmark(reader);
SimpleBookmark.ShiftPageNumbers(tmp, page_offset, null);
foreach (var d in tmp) bookmarks.Add(d);
// add the pages
int n = reader.NumberOfPages;
page_offset += n;
for (int page = 0; page < n; ) {
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
}
// Add the merged bookmarks
copy.Outlines = bookmarks;
If the existing documents don't have any bookmarks (or if you don't want to copy any existing documents), then your question is a duplicate of a question I answered half a year ago: Merge pdfs and add bookmark with iText in java

How to merge multiple pdf files (generated in run time)?

How to merge multiple pdf files (generated on run time) through ItextSharp then printing them.
I found the following link but that method requires the pdf names considering that the pdf files stored and this is not my case .
I have multiple reports i'll convert them to pdf files through this method :
private void AddReportToResponse(LocalReport followsReport)
{
string mimeType;
string encoding;
string extension;
string[] streams = new string[100];
Warning[] warnings = new Warning[100];
byte[] pdfStream = followsReport.Render("PDF", "", out mimeType, out encoding, out extension, out streams, out warnings);
//Response.Clear();
//Response.ContentType = mimeType;
//Response.AddHeader("content-disposition", "attachment; filename=Application." + extension);
//Response.BinaryWrite(pdfStream);
//Response.End();
}
Now i want to merge all those generated files (Bytes) in one pdf file to print it
If you want to merge source documents using iText(Sharp), there are two basic situations:
You really want to merge the documents, acquiring the pages in their original format, transfering as much of their content and their interactive annotations as possible. In this case you should use a solution based on a member of the Pdf*Copy* family of classes.
You actually want to integrate pages from the source documents into a new document but want the new document to govern the general format and don't care for the interactive features (annotations...) in the original documents (or even want to get rid of them). In this case you should use a solution based on the PdfWriter class.
You can find details in chapter 6 (especially section 6.4) of iText in Action — 2nd Edition. The Java sample code can be accessed here and the C#'ified versions here.
A simple sample using PdfCopy is Concatenate.java / Concatenate.cs. The central piece of code is:
byte[] mergedPdf = null;
using (MemoryStream ms = new MemoryStream())
{
using (Document document = new Document())
{
using (PdfCopy copy = new PdfCopy(document, ms))
{
document.Open();
for (int i = 0; i < pdf.Count; ++i)
{
PdfReader reader = new PdfReader(pdf[i]);
// loop over the pages in that document
int n = reader.NumberOfPages;
for (int page = 0; page < n; )
{
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
}
}
}
mergedPdf = ms.ToArray();
}
Here pdf can either be defined as a List<byte[]> immediately containing the source documents (appropriate for your use case of merging intermediate in-memory documents) or as a List<String> containing the names of source document files (appropriate if you merge documents from disk).
An overview at the end of the referenced chapter summarizes the usage of the classes mentioned:
PdfCopy: Copies pages from one or more existing PDF documents. Major downsides: PdfCopy doesn’t detect redundant content, and it fails when concatenating forms.
PdfCopyFields: Puts the fields of the different forms into one form. Can be used to avoid the problems encountered with form fields when concatenating forms using PdfCopy. Memory use can be an issue.
PdfSmartCopy: Copies pages from one or more existing PDF documents. PdfSmartCopy is able to detect redundant content, but it needs more memory and CPU than PdfCopy.
PdfWriter: Generates PDF documents from scratch. Can import pages from other PDF documents. The major downside is that all interactive features of the imported page (annotations, bookmarks, fields, and so forth) are lost in the process.
I used iTextsharp with c# to combine pdf files. This is the code I used.
string[] lstFiles=new string[3];
lstFiles[0]=#"C:/pdf/1.pdf";
lstFiles[1]=#"C:/pdf/2.pdf";
lstFiles[2]=#"C:/pdf/3.pdf";
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage;
string outputPdfPath=#"C:/pdf/new.pdf";
sourceDocument = new Document();
pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
//Open the output file
sourceDocument.Open();
try
{
//Loop through the files list
for (int f = 0; f < lstFiles.Length-1; f++)
{
int pages =get_pageCcount(lstFiles[f]);
reader = new PdfReader(lstFiles[f]);
//Add pages of current file
for (int i = 1; i <= pages; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
}
//At the end save the output file
sourceDocument.Close();
}
catch (Exception ex)
{
throw ex;
}
private int get_pageCcount(string file)
{
using (StreamReader sr = new StreamReader(File.OpenRead(file)))
{
Regex regex = new Regex(#"/Type\s*/Page[^s]");
MatchCollection matches = regex.Matches(sr.ReadToEnd());
return matches.Count;
}
}
Here is some code I pulled out of an old project I had. It was a web application but I was using iTextSharp to merge pdf files then print them.
public static class PdfMerger
{
/// <summary>
/// Merge pdf files.
/// </summary>
/// <param name="sourceFiles">PDF files being merged.</param>
/// <returns></returns>
public static byte[] MergeFiles(List<Stream> sourceFiles)
{
Document document = new Document();
MemoryStream output = new MemoryStream();
try
{
// Initialize pdf writer
PdfWriter writer = PdfWriter.GetInstance(document, output);
writer.PageEvent = new PdfPageEvents();
// Open document to write
document.Open();
PdfContentByte content = writer.DirectContent;
// Iterate through all pdf documents
for (int fileCounter = 0; fileCounter < sourceFiles.Count; fileCounter++)
{
// Create pdf reader
PdfReader reader = new PdfReader(sourceFiles[fileCounter]);
int numberOfPages = reader.NumberOfPages;
// Iterate through all pages
for (int currentPageIndex = 1; currentPageIndex <=
numberOfPages; currentPageIndex++)
{
// Determine page size for the current page
document.SetPageSize(
reader.GetPageSizeWithRotation(currentPageIndex));
// Create page
document.NewPage();
PdfImportedPage importedPage =
writer.GetImportedPage(reader, currentPageIndex);
// Determine page orientation
int pageOrientation = reader.GetPageRotation(currentPageIndex);
if ((pageOrientation == 90) || (pageOrientation == 270))
{
content.AddTemplate(importedPage, 0, -1f, 1f, 0, 0,
reader.GetPageSizeWithRotation(currentPageIndex).Height);
}
else
{
content.AddTemplate(importedPage, 1f, 0, 0, 1f, 0, 0);
}
}
}
}
catch (Exception exception)
{
throw new Exception("There has an unexpected exception" +
" occured during the pdf merging process.", exception);
}
finally
{
document.Close();
}
return output.GetBuffer();
}
}
/// <summary>
/// Implements custom page events.
/// </summary>
internal class PdfPageEvents : IPdfPageEvent
{
#region members
private BaseFont _baseFont = null;
private PdfContentByte _content;
#endregion
#region IPdfPageEvent Members
public void OnOpenDocument(PdfWriter writer, Document document)
{
_baseFont = BaseFont.CreateFont(BaseFont.HELVETICA,
BaseFont.CP1252, BaseFont.NOT_EMBEDDED);
_content = writer.DirectContent;
}
public void OnStartPage(PdfWriter writer, Document document)
{ }
public void OnEndPage(PdfWriter writer, Document document)
{ }
public void OnCloseDocument(PdfWriter writer, Document document)
{ }
public void OnParagraph(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnParagraphEnd(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnChapter(PdfWriter writer, Document document,
float paragraphPosition, Paragraph title)
{ }
public void OnChapterEnd(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnSection(PdfWriter writer, Document document,
float paragraphPosition, int depth, Paragraph title)
{ }
public void OnSectionEnd(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnGenericTag(PdfWriter writer, Document document,
Rectangle rect, string text)
{ }
#endregion
private float GetCenterTextPosition(string text, PdfWriter writer)
{
return writer.PageSize.Width / 2 - _baseFont.GetWidthPoint(text, 8) / 2;
}
}
I didn't write this, but made some modifications. I can't remember where I found it. After I merged the PDFs I would call this method to insert javascript to open the print dialog when the PDF is opened. If you change bSilent to true then it should print silently to their default printer.
public Stream addPrintJStoPDF(Stream thePDF)
{
MemoryStream outPutStream = null;
PRStream finalStream = null;
PdfDictionary page = null;
string content = null;
//Open the stream with iTextSharp
var reader = new PdfReader(thePDF);
outPutStream = new MemoryStream(finalStream.GetBytes());
var stamper = new PdfStamper(reader, (MemoryStream)outPutStream);
var jsText = "var res = app.setTimeOut('this.print({bUI: true, bSilent: false, bShrinkToFit: false});', 200);";
//Add the javascript to the PDF
stamper.JavaScript = jsText;
stamper.FormFlattening = true;
stamper.Writer.CloseStream = false;
stamper.Close();
//Set the stream to the beginning
outPutStream.Position = 0;
return outPutStream;
}
Not sure how well the above code is written since I pulled it from somewhere else and I haven't worked in depth at all with iTextSharp but I do know that it did work at merging PDFs that I was generating at runtime.
Tested with iTextSharp-LGPL 4.1.6:
public static byte[] ConcatenatePdfs(IEnumerable<byte[]> documents)
{
using (var ms = new MemoryStream())
{
var outputDocument = new Document();
var writer = new PdfCopy(outputDocument, ms);
outputDocument.Open();
foreach (var doc in documents)
{
var reader = new PdfReader(doc);
for (var i = 1; i <= reader.NumberOfPages; i++)
{
writer.AddPage(writer.GetImportedPage(reader, i));
}
writer.FreeReader(reader);
reader.Close();
}
writer.Close();
outputDocument.Close();
var allPagesContent = ms.GetBuffer();
ms.Flush();
return allPagesContent;
}
}
To avoid the memory issues mentioned, I used file stream instead of memory stream(mentioned in ITextSharp Out of memory exception merging multiple pdf) to merge pdf files:
var parentDirectory = Directory.GetParent(SelectedDocuments[0].FilePath);
var savePath = parentDirectory + "\\MergedDocument.pdf";
using (var fs = new FileStream(savePath, FileMode.Create))
{
using (var document = new Document())
{
using (var pdfCopy = new PdfCopy(document, fs))
{
document.Open();
for (var i = 0; i < SelectedDocuments.Count; i++)
{
using (var pdfReader = new PdfReader(SelectedDocuments[i].FilePath))
{
for (var page = 0; page < pdfReader.NumberOfPages;)
{
pdfCopy.AddPage(pdfCopy.GetImportedPage(pdfReader, ++page));
}
}
}
}
}
}
****/*For Multiple PDF Print..!!*/****
<button type="button" id="btnPrintMultiplePdf" runat="server" class="btn btn-primary btn-border btn-sm"
onserverclick="btnPrintMultiplePdf_click">
<i class="fa fa-file-pdf-o"></i>Print Multiple pdf</button>
protected void btnPrintMultiplePdf_click(object sender, EventArgs e)
{
if (ValidateForMultiplePDF() == true)
{
#region Declare Temp Variables..!!
CheckBox chkList = new CheckBox();
HiddenField HidNo = new HiddenField();
string Multi_fofile, Multi_listfile;
Multi_fofile = Multi_listfile = "";
Multi_fofile = Server.MapPath("PDFRNew");
#endregion
for (int i = 0; i < grdRnew.Rows.Count; i++)
{
#region Find Grd Controls..!!
CheckBox Chk_One = (CheckBox)grdRnew.Rows[i].FindControl("chkOne");
Label lbl_Year = (Label)grdRnew.Rows[i].FindControl("lblYear");
Label lbl_No = (Label)grdRnew.Rows[i].FindControl("lblCode");
#endregion
if (Chk_One.Checked == true)
{
HidNo .Value = llbl_No .Text.Trim()+ lbl_Year .Text;
if (File.Exists(Multi_fofile + "\\" + HidNo.Value.ToString() + ".pdf"))
{
#region Get Multiple Files Name And Paths..!!
if (Multi_listfile != "")
{
Multi_listfile = Multi_listfile + ",";
}
Multi_listfile = Multi_listfile + Multi_fofile + "\\" + HidNo.Value.ToString() + ".pdf";
#endregion
}
}
}
#region For Generate Multiple Pdf..!!
if (Multi_listfile != "")
{
String[] Multifiles = Multi_listfile.Split(',');
string DestinationFile = Server.MapPath("PDFRNew") + "\\Multiple.Pdf";
MergeFiles(DestinationFile, Multifiles);
Response.ContentType = "pdf";
Response.AddHeader("Content-Disposition", "attachment;filename=\"" + DestinationFile + "\"");
Response.TransmitFile(DestinationFile);
Response.End();
}
else
{
}
#endregion
}
}
private void MergeFiles(string DestinationFile, string[] SourceFiles)
{
try
{
int f = 0;
/**we create a reader for a certain Document**/
PdfReader reader = new PdfReader(SourceFiles[f]);
/**we retrieve the total number of pages**/
int n = reader.NumberOfPages;
/**Console.WriteLine("There are " + n + " pages in the original file.")**/
/**Step 1: creation of a document-object**/
Document document = new Document(reader.GetPageSizeWithRotation(1));
/**Step 2: we create a writer that listens to the Document**/
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(DestinationFile, FileMode.Create));
/**Step 3: we open the Document**/
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
/**Step 4: We Add Content**/
while (f < SourceFiles.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(reader.GetPageSizeWithRotation(i));
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
/**Console.WriteLine("Processed page " + i)**/
}
f++;
if (f < SourceFiles.Length)
{
reader = new PdfReader(SourceFiles[f]);
/**we retrieve the total number of pages**/
n = reader.NumberOfPages;
/**Console.WriteLine("There are"+n+"pages in the original file.")**/
}
}
/**Step 5: we Close the Document**/
document.Close();
}
catch (Exception e)
{
string strOb = e.Message;
}
}
private bool ValidateForMultiplePDF()
{
bool chkList = false;
foreach (GridViewRow gvr in grdRnew.Rows)
{
CheckBox Chk_One = (CheckBox)gvr.FindControl("ChkSelectOne");
if (Chk_One.Checked == true)
{
chkList = true;
}
}
if (chkList == false)
{
divStatusMsg.Style.Add("display", "");
divStatusMsg.Attributes.Add("class", "alert alert-danger alert-dismissable");
divStatusMsg.InnerText = "ERROR !!...Please Check At Least On CheckBox.";
grdRnew.Focus();
set_timeout();
return false;
}
return true;
}

itextsharp trimming pdf document's pages

I have a pdf document that has form fields that I'm filling out programatically with c#. Depending on three conditions, I need to trim (delete) some of the pages from that document.
Is that possible to do?
for condition 1: I need to keep pages 1-4 but delete pages 5 and 6
for condition 2: I need to keep pages 1-4 but delete 5 and keep 6
for condition 3: I need to keep pages 1-5 but delete 6
Use PdfReader.SelectPages() combined with PdfStamper. The code below uses iTextSharp 5.5.1.
public void SelectPages(string inputPdf, string pageSelection, string outputPdf)
{
using (PdfReader reader = new PdfReader(inputPdf))
{
reader.SelectPages(pageSelection);
using (PdfStamper stamper = new PdfStamper(reader, File.Create(outputPdf)))
{
stamper.Close();
}
}
}
Then you call this method with the correct page selection for each condition.
Condition 1:
SelectPages(inputPdf, "1-4", outputPdf);
Condition 2:
SelectPages(inputPdf, "1-4,6", outputPdf);
or
SelectPages(inputPdf, "1-6,!5", outputPdf);
Condition 3:
SelectPages(inputPdf, "1-5", outputPdf);
Here's the comment from the iTextSharp source code on what makes up a page selection. This is in the SequenceList class which is used to process a page selection:
/**
* This class expands a string into a list of numbers. The main use is to select a
* range of pages.
* <p>
* The general systax is:<br>
* [!][o][odd][e][even]start-end
* <p>
* You can have multiple ranges separated by commas ','. The '!' modifier removes the
* range from what is already selected. The range changes are incremental, that is,
* numbers are added or deleted as the range appears. The start or the end, but not both, can be ommited.
*/
Instead of deleting pages in a document what you actually do is create a new document and only import the pages that you want to keep. Below is a full working WinForms app that does that (targetting iTextSharp 5.1.1.0). The last parameter to the function removePagesFromPdf is an array of pages to keep.
The code below works off of physical files but would be very easy to convert to something based on streams so that you don't have to write to disk if you don't want to.
using System;
using System.ComponentModel;
using System.IO;
using System.Linq;
using System.Windows.Forms;
using iTextSharp.text.pdf;
using iTextSharp.text;
namespace Full_Profile1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//The files that we are working with
string sourceFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string sourceFile = Path.Combine(sourceFolder, "Test.pdf");
string destFile = Path.Combine(sourceFolder, "TestOutput.pdf");
//Remove all pages except 1,2,3,4 and 6
removePagesFromPdf(sourceFile, destFile, 1, 2, 3, 4, 6);
this.Close();
}
public void removePagesFromPdf(String sourceFile, String destinationFile, params int[] pagesToKeep)
{
//Used to pull individual pages from our source
PdfReader r = new PdfReader(sourceFile);
//Create our destination file
using (FileStream fs = new FileStream(destinationFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document())
{
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
//Open the desitination for writing
doc.Open();
//Loop through each page that we want to keep
foreach (int page in pagesToKeep)
{
//Add a new blank page to destination document
doc.NewPage();
//Extract the given page from our reader and add it directly to the destination PDF
w.DirectContent.AddTemplate(w.GetImportedPage(r, page), 0, 0);
}
//Close our document
doc.Close();
}
}
}
}
}
}
Here is the code I use to copy all but the last page of an existing PDF. Everything is in memory streams. The variable pdfByteArray is a byte[] of the original pdf obtained using ms.ToArray(). pdfByteArray is overwritten with the new PDF.
PdfReader originalPDFReader = new PdfReader(pdfByteArray);
using (MemoryStream msCopy = new MemoryStream())
{
using (Document docCopy = new Document())
{
using (PdfCopy copy = new PdfCopy(docCopy, msCopy))
{
docCopy.Open();
for (int pageNum = 1; pageNum <= originalPDFReader.NumberOfPages - 1; pageNum ++)
{
copy.AddPage(copy.GetImportedPage(originalPDFReader, pageNum ));
}
docCopy.Close();
}
}
pdfByteArray = msCopy.ToArray();
I know it's an old post, Simply I extend the #chris-haas solution to the next level.
Delete the selected pages after that save them into the separate pdf file.
//ms is MemoryStream and fs is FileStream
ms.CopyTo(fs);
Save the Stream to a separate pdf file. 100% working without any error.
pageRange="5"
pageRange="2,15-20"
pageRange="1-5,15-20"
You can pass the pageRange vales like the above-given samples.
private void DeletePagesNew(string pageRange, string SourcePdfPath, string OutputPdfPath, string Password = "")
{
try
{
var pagesToDelete = new List<int>();
if (pageRange.IndexOf(",") != -1)
{
var tmpHold = pageRange.Split(',');
foreach (string nonconseq in tmpHold)
{
if (nonconseq.IndexOf("-") != -1)
{
var rangeHold = nonconseq.Split('-');
for (int i = Convert.ToInt32(rangeHold[0]), loopTo = Convert.ToInt32(rangeHold[1]); i <= loopTo; i++)
pagesToDelete.Add(i);
}
else
{
pagesToDelete.Add(Convert.ToInt32(nonconseq));
}
}
}
else if (pageRange.IndexOf("-") != -1)
{
var rangeHold = pageRange.Split('-');
for (int i = Convert.ToInt32(rangeHold[0]), loopTo1 = Convert.ToInt32(rangeHold[1]); i <= loopTo1; i++)
pagesToDelete.Add(i);
}
else
{
pagesToDelete.Add(Convert.ToInt32(pageRange));
}
var Reader = new PdfReader(SourcePdfPath);
int[] pagesToKeep;
pagesToKeep = Enumerable.Range(1, Reader.NumberOfPages).ToArray();
using (var ms = new MemoryStream())
{
using (var fs = new FileStream(OutputPdfPath, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (var doc = new Document())
{
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
foreach (int p in pagesToKeep)
{
if (pagesToDelete.FindIndex(s => s == p) != -1)
{
continue;
}
// doc.NewPage()
// w.DirectContent.AddTemplate(w.GetImportedPage(Reader, p), 0, 0)
//
doc.SetPageSize(Reader.GetPageSize(p));
doc.NewPage();
PdfContentByte cb = w.DirectContent;
PdfImportedPage pageImport = w.GetImportedPage(Reader, p);
int rot = Reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0f, 1.0f, 0, 0, Reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0f, 0, 0, 1.0f, 0, 0);
}
cb = default;
pageImport = default;
rot = default;
}
ms.CopyTo(fs);
fs.Flush();
doc.Close();
}
}
}
}
pagesToDelete = null;
Reader.Close();
Reader = default;
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}

Categories

Resources