split PDF into multiple files in C#

split PDF into multiple files in C# - c#

We have a C# Windows service that currently processes all the PDFs by reading the 2D barcode on the PDF using a 3rd party component and then updates the database and stores the document in the Document repository.
Is there a way I can cut the files after reading the barcode and store it as another document?
For example if there is a 10 page document, it should split into 10 different files.
Thanks.

I met the same question, you can use itextsharp component tools to split the document
public Split(String[] args)
{
if (args.Length != 4)
{
Console.Error.WriteLine("This tools needs 4 parameters:\njava Split srcfile destfile1 destfile2 pagenumber");
}
else
{
try
{
int pagenumber = int.Parse(args[3]);
// we create a reader for a certain document
PdfReader reader = new PdfReader(args[0]);
// we retrieve the total number of pages
int n = reader.NumberOfPages;
Console.WriteLine("There are " + n + " pages in the original file.");
if (pagenumber < 2 || pagenumber > n)
{
throw new DocumentException("You can't split this document at page " + pagenumber + "; there is no such page.");
}
// step 1: creation of a document-object
Document document1 = new Document(reader.GetPageSizeWithRotation(1));
Document document2 = new Document(reader.GetPageSizeWithRotation(pagenumber));
// step 2: we create a writer that listens to the document
PdfWriter writer1 = PdfWriter.GetInstance(document1, new FileStream(args[1], FileMode.Create));
PdfWriter writer2 = PdfWriter.GetInstance(document2, new FileStream(args[2], FileMode.Create));
// step 3: we open the document
document1.Open();
PdfContentByte cb1 = writer1.DirectContent;
document2.Open();
PdfContentByte cb2 = writer2.DirectContent;
PdfImportedPage page;
int rotation;
int i = 0;
// step 4: we add content
while (i < pagenumber - 1)
{
i++;
document1.SetPageSize(reader.GetPageSizeWithRotation(i));
document1.NewPage();
page = writer1.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb1.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb1.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
}
while (i < n)
{
i++;
document2.SetPageSize(reader.GetPageSizeWithRotation(i));
document2.NewPage();
page = writer2.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb2.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb2.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
Console.WriteLine("Processed page " + i);
}
// step 5: we close the document
document1.Close();
document2.Close();
}
catch(Exception e)
{
Console.Error.WriteLine(e.Message);
Console.Error.WriteLine(e.StackTrace);
}
}
}

You can use a PDF library like PDFSharp, read the file, iterate through each of the pages, add them to a new PDF document and save them on the filesystem. You can then also delete or keep the original.
It's quite a bit of code, but very simple and these samples should get you started.
http://www.pdfsharp.net/wiki/Default.aspx?Page=ConcatenateDocuments-sample&NS=&AspxAutoDetectCookieSupport=1

A previous question answers your partially - how to split pdf documents, if you know where the barcodes are then you can split the documents easily:
How can I split up a PDF file into pages (preferably C#)
The recommendation is a library called PDFSharp and a sample demonstrating PDF splitting.

public int ExtractPages(string sourcePdfPath, string DestinationFolder)
{
int p = 0;
try
{
iTextSharp.text.Document document;
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdfPath), new ASCIIEncoding().GetBytes(""));
if (!Directory.Exists(sourcePdfPath.ToLower().Replace(".pdf", "")))
{
Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", ""));
}
else
{
Directory.Delete(sourcePdfPath.ToLower().Replace(".pdf", ""), true);
Directory.CreateDirectory(sourcePdfPath.ToLower().Replace(".pdf", ""));
}
for (p = 1; p <= reader.NumberOfPages; p++)
{
using (MemoryStream memoryStream = new MemoryStream())
{
document = new iTextSharp.text.Document();
iTextSharp.text.pdf.PdfWriter writer = iTextSharp.text.pdf.PdfWriter.GetInstance(document, memoryStream);
writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_2);
writer.CompressionLevel = iTextSharp.text.pdf.PdfStream.BEST_COMPRESSION;
writer.SetFullCompression();
document.SetPageSize(reader.GetPageSize(p));
document.NewPage();
document.Open();
document.AddDocListener(writer);
iTextSharp.text.pdf.PdfContentByte cb = writer.DirectContent;
iTextSharp.text.pdf.PdfImportedPage pageImport = writer.GetImportedPage(reader, p);
int rot = reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0F, 1.0F, 0, 0, reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0F, 0, 0, 1.0F, 0, 0);
}
document.Close();
document.Dispose();
File.WriteAllBytes(DestinationFolder + "/" + p + ".pdf", memoryStream.ToArray());
}
}
reader.Close();
reader.Dispose();
}
catch
{
}
finally
{
GC.Collect();
}
return p - 1;
}
call this function where ever you want and pass the source and destination folder path

This code is based on the PDFsharp library
http://www.pdfsharp.com/PDFsharp/
If you want to split by Book Mark then here is the code.
public static void SplitPDFByBookMark(string fileName)
{
string sInFile = fileName;
PdfReader pdfReader = new PdfReader(sInFile);
try
{
IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader);
for (int i = 0; i < bookmarks.Count; ++i)
{
IDictionary<string, object> BM = (IDictionary<string, object>)bookmarks[0];
IDictionary<string, object> nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1];
string startPage = BM["Page"].ToString().Split(' ')[0].ToString();
string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString();
SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName);
}
}
catch (Exception ex)
{
throw ex;
}
}
private static void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName)
{
Document document = new Document();
FileStream fs = new System.IO.FileStream(System.IO.Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create);
try
{
PdfWriter writer = PdfWriter.GetInstance(document, fs);
document.Open();
PdfContentByte cb = writer.DirectContent;
//holds pdf data
PdfImportedPage page;
if (pageFrom == PageTo && pageFrom == 1)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
else
{
while (pageFrom < PageTo)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
}
}
catch (Exception ex)
{
throw ex;
}
finally
{
if (document.IsOpen())
document.Close();
if (fs != null)
fs.Close();
}
}

public void SplitPDFByBookMark(string fileName)
{
string sInFile = fileName;
var pdfReader = new PdfReader(sInFile);
try
{
IList<Dictionary<string, object>> bookmarks = SimpleBookmark.GetBookmark(pdfReader);
for (int i = 0; i < bookmarks.Count; ++i)
{
IDictionary<string, object> BM = (IDictionary<string, object>)bookmarks[i];
IDictionary<string, object> nextBM = i == bookmarks.Count - 1 ? null : bookmarks[i + 1];
string startPage = BM["Page"].ToString().Split(' ')[0].ToString();
string startPageNextBM = nextBM == null ? "" + (pdfReader.NumberOfPages + 1) : nextBM["Page"].ToString().Split(' ')[0].ToString();
SplitByBookmark(pdfReader, int.Parse(startPage), int.Parse(startPageNextBM), bookmarks[i].Values.ToArray().GetValue(0).ToString() + ".pdf", fileName);
}
}
catch (Exception ex)
{
throw ex;
}
}
private void SplitByBookmark(PdfReader reader, int pageFrom, int PageTo, string outPutName, string inPutFileName)
{
Document document = new Document();
using (var fs = new FileStream(Path.GetDirectoryName(inPutFileName) + '\\' + outPutName, System.IO.FileMode.Create))
{
try
{
using (var writer = PdfWriter.GetInstance(document, fs))
{
document.Open();
PdfContentByte cb = writer.DirectContent;
//holds pdf data
PdfImportedPage page;
if (pageFrom == PageTo && pageFrom == 1)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
else
{
while (pageFrom < PageTo)
{
document.NewPage();
page = writer.GetImportedPage(reader, pageFrom);
cb.AddTemplate(page, 0, 0);
pageFrom++;
fs.Flush();
document.Close();
fs.Close();
}
}
}
//PdfWriter writer = PdfWriter.GetInstance(document, fs);
}
catch (Exception ex)
{
throw ex;
}
}
}
You can install itextsharp from nuget and copy and paste this code in a c# app Call the SplitPDFByBookMark() method and pass the pdf filename.
This code is going to search for you bookmarks and done!

An easy way , using Spire PDF :
using Spire.Pdf;
namespace split_pdf
{
class Program
{
static void Main(string[] args)
{
//open an existing pdf document
PdfDocument doc = new PdfDocument(#"..\Sample3.pdf");
//Split one PDF document to multiple files
String pattern = "SplitDocument-{0}.pdf";
doc.Split(pattern);
String lastPageFileName
= String.Format(pattern, doc.Pages.Count - 1);
doc.Close();
}
}
}
here the Documentation Page : SpirePdf

Related

How to extract images from pdf and save each as a jpeg with C# & itext7

this is a C# winforms project
Given a file folder, need to open any pdf file and for each file,
evaluate every object. If the object is an image, extract it, convert it and save it
as a Jpeg file in that same folder.
private void btnExtractImages_Click(object sender, EventArgs e)
{
try
{
DirectoryInfo di = new DirectoryInfo(txtSourcePdf.Text);
foreach (FileInfo fi in di.GetFiles())
{
PdfDocument doc = new PdfDocument(new PdfReader(fi.FullName));
string fNm = fi.Name.Replace(".pdf", ""); //Filename without the extension
int pgs = doc.GetNumberOfPages();
for (int pg = 1; pg <= pgs; pg++)
{
PdfPage pdfPage = doc.GetPage(pg);
PdfFormXObject xObj = pdfPage.CopyAsFormXObject(doc);
iText.Layout.Element.Image img = new iText.Layout.Element.Image(xObj);
//good so far.
//need to turn this xObj or img into a jpg and save to disk
...
}
// This works, however it creates a .dat file, not a .jpg file
// int objs = doc.GetNumberOfPdfObjects();
// for (int o = 1; o <= objs; o++)
// {
// PdfObject obj = doc.GetPdfObject(o);
// if (obj != null && obj.IsStream())
// {
// byte[] b;
// try
// {
// // Get decoded stream bytes.
// b = ((PdfStream)obj).GetBytes();
// }
// catch (Exception)
// {
// //Get originally encoded stream bytes
// b = ((PdfStream)obj).GetBytes(false);
// }
// using (FileStream fos = new FileStream(String.Format(txtSourcePdf.Text + "/" + fNm + "{0}.jpg", o), FileMode.Create))
// {
// fos.Write(b, 0, b.Length);
// }
}
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}

ABCpdf only rendering first page

When I try to save each page as GIF using ABCpdf, only the first page is saved.
For example: I have a PDF that has 3 pages. I use ABCpdf to render each page to a stream, which is saved to disk. When I open the files in my destination folder, all 3 files show the first page content.
Here's my code:
using (Doc theDoc = new Doc())
{
XReadOptions options = new XReadOptions { ReadModule = ReadModuleType.Pdf };
theDoc.Read(inputbytearray, options);
using (MemoryStream ms = new MemoryStream())
{
theDoc.Rendering.DotsPerInch = 150;
int n = theDoc.PageCount;
for (int i = 1; i <= n; i++)
{
Guid FileName = Guid.NewGuid();
theDoc.Rect.String = theDoc.CropBox.String;
theDoc.Rendering.SaveAppend = (i != 1);
theDoc.Rendering.SaveCompression = XRendering.Compression.G4;
theDoc.PageNumber = i;
theDoc.Rendering.Save(string.Format("{0}.gif", FileName), ms);
using (var streamupload = new MemoryStream(ms.GetBuffer(), writable: false))
{
_BlobStorageService.UploadfromStream(FileName.ToString(), streamupload, STR_Gif, STR_Imagegif);
}
}
// theDoc.Clear();
}
}

The Rendering.SaveAppend property is only applicable when saving TIFF images. For GIFs you would need to save a separate image for each PDF page.
private void button1_Click(object sender, System.EventArgs e)
{
string theDir = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.FullName + #"\files\";
// Create test PDF
using (Doc doc = new Doc())
{
for (int i = 1; i <= 3; i++)
{
doc.Page = doc.AddPage();
doc.AddHtml("<font size=24>PAGE " + i.ToString());
}
doc.Save(Path.Combine(theDir, "test.pdf"));
}
// Save PDF pages to GIF streams
using (Doc doc = new Doc())
{
doc.Read(Path.Combine(theDir, "test.pdf"));
for (int i = 1; i <= doc.PageCount; i++)
{
doc.PageNumber = i;
doc.Rect.String = doc.CropBox.String;
using (MemoryStream ms = new MemoryStream())
{
doc.Rendering.Save("dummy.gif", ms);
using (FileStream fs = File.Create(Path.Combine(theDir, "p" + i.ToString() + ".gif")))
{
ms.Seek(0, SeekOrigin.Begin);
ms.CopyTo(fs);
}
}
}
}
}

Adding a Table into the Footer of a PDF

After creating a PDF from HTML, I need to add a Footer to each page. The Footer is going to be a single-row, three-column table, with the left cell being an external reference ID, the center being a "Page X of Y", and the right being a date stamp. I have no experience with iTextSharp, but after reading various posts I created the following PageEventHandler
UPDATED CODE
public class FooterEvent : PdfPageEventHelper
{
PdfContentByte cb;
#region Properties
private string _FooterLeft;
public string FooterLeft
{
get { return _FooterLeft; }
set { _FooterLeft = value; }
}
private string _FooterCenter;
public string FooterCenter
{
get { return _FooterCenter; }
set { _FooterCenter = value; }
}
private string _FooterRight;
public string FooterRight
{
get { return _FooterRight; }
set { _FooterRight = value; }
}
private Font _FooterFont;
public Font FooterFont
{
get { return _FooterFont; }
set { _FooterFont = value; }
}
#endregion
public override void OnEndPage(PdfWriter writer, Document document)
{
base.OnEndPage(writer, document);
Font font = new Font(Font.FontFamily.HELVETICA, 12, Font.NORMAL, BaseColor.BLACK);
PdfPTable FooterTable = new PdfPTable(3);
FooterTable.TotalWidth = document.PageSize.Width - document.LeftMargin - document.RightMargin;
PdfPCell FooterLeftCell = new PdfPCell(new Phrase(2, FooterLeft, FooterFont));
FooterLeftCell.HorizontalAlignment = Element.ALIGN_LEFT;
FooterLeftCell.VerticalAlignment = Element.ALIGN_CENTER;
FooterLeftCell.Border = 0;
FooterTable.AddCell(FooterLeftCell);
PdfPCell FooterCenterCell = new PdfPCell(new Phrase(2, FooterCenter, FooterFont));
FooterCenterCell.HorizontalAlignment = Element.ALIGN_CENTER;
FooterCenterCell.VerticalAlignment = Element.ALIGN_CENTER;
FooterCenterCell.Border = 0;
FooterTable.AddCell(FooterCenterCell);
PdfPCell FooterRightCell = new PdfPCell(new Phrase(2, FooterRight, FooterFont));
FooterRightCell.HorizontalAlignment = Element.ALIGN_RIGHT;
FooterRightCell.VerticalAlignment = Element.ALIGN_CENTER;
FooterRightCell.Border = 0;
FooterTable.AddCell(FooterRightCell);
FooterTable.WriteSelectedRows(0, -1, document.LeftMargin, document.BottomMargin, cb);
}
}
ADDED RESPONSE
After editing my PageEvent, I'm still having issues. It's come to mind that I'm probably having issues with calling the PageEvent and adding it to the PDF (no experience with iTextSharp). Below is my attempt to add the Footer to an existing PDF that has been passed as byte[].
byte[] output = null;
string identifier = id;
string time = DateTime.Now.ToString();
string page = null;
PdfReader reader = new PdfReader(original);
int n = reader.NumberOfPages;
try
{
using (MemoryStream ms = new MemoryStream())
{
using (Document doc = new Document(PageSize.LETTER, 100, 100, 100, 100))
{
using (PdfWriter writer = PdfWriter.GetInstance(doc, ms))
{
FooterEvent footer = new FooterEvent();
writer.PageEvent = footer;
footer.FooterFont = FontFactory.GetFont(BaseFont.HELVETICA, 12, BaseColor.BLACK);
doc.Open();
for (int i = 1; i < n + 1; ++i)
{
doc.NewPage();
page = "Page " + i + " of " + n;
footer.FooterLeft = identifier;
footer.FooterCenter = page;
footer.FooterRight = time;
doc.Add(new Paragraph(reader.GetPageContent(i).ToString()));
//Probably wrong. Trying to add contents from each page in original PDF
}
doc.Close();
}
}
output = ms.ToArray();
}
}
catch (Exception ex)
{
//Some Message added later
}
return output;
Any help is appreciated. Thanks in advance.

Try this, its working for me:
FooterTable.WriteSelectedRows(0, -1, document.LeftMargin, FooterTable.TotalHeight, cb);
Check this post
Header, footer and large tables with iTextSharp

You wrote:
FooterTable.WriteSelectedRows(0, 0, document.LeftMargin, document.RightMargin, cb);
However, the method is:
FooterTable.WriteSelectedRows(rowStart, rowEnd, x, y, cb);
Which means that you're asking to write a selection of rows starting with row 0 and ending with row 0, or: you're asking to write not a single row.
Moreover, you provide an x value instead of a y value. Change that line into:
FooterTable.WriteSelectedRows(0, -1, document.LeftMargin, document.BottomMargin, cb);

How to merge multiple pdf files (generated in run time)?

How to merge multiple pdf files (generated on run time) through ItextSharp then printing them.
I found the following link but that method requires the pdf names considering that the pdf files stored and this is not my case .
I have multiple reports i'll convert them to pdf files through this method :
private void AddReportToResponse(LocalReport followsReport)
{
string mimeType;
string encoding;
string extension;
string[] streams = new string[100];
Warning[] warnings = new Warning[100];
byte[] pdfStream = followsReport.Render("PDF", "", out mimeType, out encoding, out extension, out streams, out warnings);
//Response.Clear();
//Response.ContentType = mimeType;
//Response.AddHeader("content-disposition", "attachment; filename=Application." + extension);
//Response.BinaryWrite(pdfStream);
//Response.End();
}
Now i want to merge all those generated files (Bytes) in one pdf file to print it

If you want to merge source documents using iText(Sharp), there are two basic situations:
You really want to merge the documents, acquiring the pages in their original format, transfering as much of their content and their interactive annotations as possible. In this case you should use a solution based on a member of the Pdf*Copy* family of classes.
You actually want to integrate pages from the source documents into a new document but want the new document to govern the general format and don't care for the interactive features (annotations...) in the original documents (or even want to get rid of them). In this case you should use a solution based on the PdfWriter class.
You can find details in chapter 6 (especially section 6.4) of iText in Action — 2nd Edition. The Java sample code can be accessed here and the C#'ified versions here.
A simple sample using PdfCopy is Concatenate.java / Concatenate.cs. The central piece of code is:
byte[] mergedPdf = null;
using (MemoryStream ms = new MemoryStream())
{
using (Document document = new Document())
{
using (PdfCopy copy = new PdfCopy(document, ms))
{
document.Open();
for (int i = 0; i < pdf.Count; ++i)
{
PdfReader reader = new PdfReader(pdf[i]);
// loop over the pages in that document
int n = reader.NumberOfPages;
for (int page = 0; page < n; )
{
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
}
}
}
mergedPdf = ms.ToArray();
}
Here pdf can either be defined as a List<byte[]> immediately containing the source documents (appropriate for your use case of merging intermediate in-memory documents) or as a List<String> containing the names of source document files (appropriate if you merge documents from disk).
An overview at the end of the referenced chapter summarizes the usage of the classes mentioned:
PdfCopy: Copies pages from one or more existing PDF documents. Major downsides: PdfCopy doesn’t detect redundant content, and it fails when concatenating forms.
PdfCopyFields: Puts the fields of the different forms into one form. Can be used to avoid the problems encountered with form fields when concatenating forms using PdfCopy. Memory use can be an issue.
PdfSmartCopy: Copies pages from one or more existing PDF documents. PdfSmartCopy is able to detect redundant content, but it needs more memory and CPU than PdfCopy.
PdfWriter: Generates PDF documents from scratch. Can import pages from other PDF documents. The major downside is that all interactive features of the imported page (annotations, bookmarks, fields, and so forth) are lost in the process.

I used iTextsharp with c# to combine pdf files. This is the code I used.
string[] lstFiles=new string[3];
lstFiles[0]=#"C:/pdf/1.pdf";
lstFiles[1]=#"C:/pdf/2.pdf";
lstFiles[2]=#"C:/pdf/3.pdf";
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage;
string outputPdfPath=#"C:/pdf/new.pdf";
sourceDocument = new Document();
pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
//Open the output file
sourceDocument.Open();
try
{
//Loop through the files list
for (int f = 0; f < lstFiles.Length-1; f++)
{
int pages =get_pageCcount(lstFiles[f]);
reader = new PdfReader(lstFiles[f]);
//Add pages of current file
for (int i = 1; i <= pages; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
}
//At the end save the output file
sourceDocument.Close();
}
catch (Exception ex)
{
throw ex;
}
private int get_pageCcount(string file)
{
using (StreamReader sr = new StreamReader(File.OpenRead(file)))
{
Regex regex = new Regex(#"/Type\s*/Page[^s]");
MatchCollection matches = regex.Matches(sr.ReadToEnd());
return matches.Count;
}
}

Here is some code I pulled out of an old project I had. It was a web application but I was using iTextSharp to merge pdf files then print them.
public static class PdfMerger
{
/// <summary>
/// Merge pdf files.
/// </summary>
/// <param name="sourceFiles">PDF files being merged.</param>
/// <returns></returns>
public static byte[] MergeFiles(List<Stream> sourceFiles)
{
Document document = new Document();
MemoryStream output = new MemoryStream();
try
{
// Initialize pdf writer
PdfWriter writer = PdfWriter.GetInstance(document, output);
writer.PageEvent = new PdfPageEvents();
// Open document to write
document.Open();
PdfContentByte content = writer.DirectContent;
// Iterate through all pdf documents
for (int fileCounter = 0; fileCounter < sourceFiles.Count; fileCounter++)
{
// Create pdf reader
PdfReader reader = new PdfReader(sourceFiles[fileCounter]);
int numberOfPages = reader.NumberOfPages;
// Iterate through all pages
for (int currentPageIndex = 1; currentPageIndex <=
numberOfPages; currentPageIndex++)
{
// Determine page size for the current page
document.SetPageSize(
reader.GetPageSizeWithRotation(currentPageIndex));
// Create page
document.NewPage();
PdfImportedPage importedPage =
writer.GetImportedPage(reader, currentPageIndex);
// Determine page orientation
int pageOrientation = reader.GetPageRotation(currentPageIndex);
if ((pageOrientation == 90) || (pageOrientation == 270))
{
content.AddTemplate(importedPage, 0, -1f, 1f, 0, 0,
reader.GetPageSizeWithRotation(currentPageIndex).Height);
}
else
{
content.AddTemplate(importedPage, 1f, 0, 0, 1f, 0, 0);
}
}
}
}
catch (Exception exception)
{
throw new Exception("There has an unexpected exception" +
" occured during the pdf merging process.", exception);
}
finally
{
document.Close();
}
return output.GetBuffer();
}
}
/// <summary>
/// Implements custom page events.
/// </summary>
internal class PdfPageEvents : IPdfPageEvent
{
#region members
private BaseFont _baseFont = null;
private PdfContentByte _content;
#endregion
#region IPdfPageEvent Members
public void OnOpenDocument(PdfWriter writer, Document document)
{
_baseFont = BaseFont.CreateFont(BaseFont.HELVETICA,
BaseFont.CP1252, BaseFont.NOT_EMBEDDED);
_content = writer.DirectContent;
}
public void OnStartPage(PdfWriter writer, Document document)
{ }
public void OnEndPage(PdfWriter writer, Document document)
{ }
public void OnCloseDocument(PdfWriter writer, Document document)
{ }
public void OnParagraph(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnParagraphEnd(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnChapter(PdfWriter writer, Document document,
float paragraphPosition, Paragraph title)
{ }
public void OnChapterEnd(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnSection(PdfWriter writer, Document document,
float paragraphPosition, int depth, Paragraph title)
{ }
public void OnSectionEnd(PdfWriter writer,
Document document, float paragraphPosition)
{ }
public void OnGenericTag(PdfWriter writer, Document document,
Rectangle rect, string text)
{ }
#endregion
private float GetCenterTextPosition(string text, PdfWriter writer)
{
return writer.PageSize.Width / 2 - _baseFont.GetWidthPoint(text, 8) / 2;
}
}
I didn't write this, but made some modifications. I can't remember where I found it. After I merged the PDFs I would call this method to insert javascript to open the print dialog when the PDF is opened. If you change bSilent to true then it should print silently to their default printer.
public Stream addPrintJStoPDF(Stream thePDF)
{
MemoryStream outPutStream = null;
PRStream finalStream = null;
PdfDictionary page = null;
string content = null;
//Open the stream with iTextSharp
var reader = new PdfReader(thePDF);
outPutStream = new MemoryStream(finalStream.GetBytes());
var stamper = new PdfStamper(reader, (MemoryStream)outPutStream);
var jsText = "var res = app.setTimeOut('this.print({bUI: true, bSilent: false, bShrinkToFit: false});', 200);";
//Add the javascript to the PDF
stamper.JavaScript = jsText;
stamper.FormFlattening = true;
stamper.Writer.CloseStream = false;
stamper.Close();
//Set the stream to the beginning
outPutStream.Position = 0;
return outPutStream;
}
Not sure how well the above code is written since I pulled it from somewhere else and I haven't worked in depth at all with iTextSharp but I do know that it did work at merging PDFs that I was generating at runtime.

Tested with iTextSharp-LGPL 4.1.6:
public static byte[] ConcatenatePdfs(IEnumerable<byte[]> documents)
{
using (var ms = new MemoryStream())
{
var outputDocument = new Document();
var writer = new PdfCopy(outputDocument, ms);
outputDocument.Open();
foreach (var doc in documents)
{
var reader = new PdfReader(doc);
for (var i = 1; i <= reader.NumberOfPages; i++)
{
writer.AddPage(writer.GetImportedPage(reader, i));
}
writer.FreeReader(reader);
reader.Close();
}
writer.Close();
outputDocument.Close();
var allPagesContent = ms.GetBuffer();
ms.Flush();
return allPagesContent;
}
}

To avoid the memory issues mentioned, I used file stream instead of memory stream(mentioned in ITextSharp Out of memory exception merging multiple pdf) to merge pdf files:
var parentDirectory = Directory.GetParent(SelectedDocuments[0].FilePath);
var savePath = parentDirectory + "\\MergedDocument.pdf";
using (var fs = new FileStream(savePath, FileMode.Create))
{
using (var document = new Document())
{
using (var pdfCopy = new PdfCopy(document, fs))
{
document.Open();
for (var i = 0; i < SelectedDocuments.Count; i++)
{
using (var pdfReader = new PdfReader(SelectedDocuments[i].FilePath))
{
for (var page = 0; page < pdfReader.NumberOfPages;)
{
pdfCopy.AddPage(pdfCopy.GetImportedPage(pdfReader, ++page));
}
}
}
}
}
}

****/*For Multiple PDF Print..!!*/****
<button type="button" id="btnPrintMultiplePdf" runat="server" class="btn btn-primary btn-border btn-sm"
onserverclick="btnPrintMultiplePdf_click">
<i class="fa fa-file-pdf-o"></i>Print Multiple pdf</button>
protected void btnPrintMultiplePdf_click(object sender, EventArgs e)
{
if (ValidateForMultiplePDF() == true)
{
#region Declare Temp Variables..!!
CheckBox chkList = new CheckBox();
HiddenField HidNo = new HiddenField();
string Multi_fofile, Multi_listfile;
Multi_fofile = Multi_listfile = "";
Multi_fofile = Server.MapPath("PDFRNew");
#endregion
for (int i = 0; i < grdRnew.Rows.Count; i++)
{
#region Find Grd Controls..!!
CheckBox Chk_One = (CheckBox)grdRnew.Rows[i].FindControl("chkOne");
Label lbl_Year = (Label)grdRnew.Rows[i].FindControl("lblYear");
Label lbl_No = (Label)grdRnew.Rows[i].FindControl("lblCode");
#endregion
if (Chk_One.Checked == true)
{
HidNo .Value = llbl_No .Text.Trim()+ lbl_Year .Text;
if (File.Exists(Multi_fofile + "\\" + HidNo.Value.ToString() + ".pdf"))
{
#region Get Multiple Files Name And Paths..!!
if (Multi_listfile != "")
{
Multi_listfile = Multi_listfile + ",";
}
Multi_listfile = Multi_listfile + Multi_fofile + "\\" + HidNo.Value.ToString() + ".pdf";
#endregion
}
}
}
#region For Generate Multiple Pdf..!!
if (Multi_listfile != "")
{
String[] Multifiles = Multi_listfile.Split(',');
string DestinationFile = Server.MapPath("PDFRNew") + "\\Multiple.Pdf";
MergeFiles(DestinationFile, Multifiles);
Response.ContentType = "pdf";
Response.AddHeader("Content-Disposition", "attachment;filename=\"" + DestinationFile + "\"");
Response.TransmitFile(DestinationFile);
Response.End();
}
else
{
}
#endregion
}
}
private void MergeFiles(string DestinationFile, string[] SourceFiles)
{
try
{
int f = 0;
/**we create a reader for a certain Document**/
PdfReader reader = new PdfReader(SourceFiles[f]);
/**we retrieve the total number of pages**/
int n = reader.NumberOfPages;
/**Console.WriteLine("There are " + n + " pages in the original file.")**/
/**Step 1: creation of a document-object**/
Document document = new Document(reader.GetPageSizeWithRotation(1));
/**Step 2: we create a writer that listens to the Document**/
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(DestinationFile, FileMode.Create));
/**Step 3: we open the Document**/
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
/**Step 4: We Add Content**/
while (f < SourceFiles.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(reader.GetPageSizeWithRotation(i));
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
/**Console.WriteLine("Processed page " + i)**/
}
f++;
if (f < SourceFiles.Length)
{
reader = new PdfReader(SourceFiles[f]);
/**we retrieve the total number of pages**/
n = reader.NumberOfPages;
/**Console.WriteLine("There are"+n+"pages in the original file.")**/
}
}
/**Step 5: we Close the Document**/
document.Close();
}
catch (Exception e)
{
string strOb = e.Message;
}
}
private bool ValidateForMultiplePDF()
{
bool chkList = false;
foreach (GridViewRow gvr in grdRnew.Rows)
{
CheckBox Chk_One = (CheckBox)gvr.FindControl("ChkSelectOne");
if (Chk_One.Checked == true)
{
chkList = true;
}
}
if (chkList == false)
{
divStatusMsg.Style.Add("display", "");
divStatusMsg.Attributes.Add("class", "alert alert-danger alert-dismissable");
divStatusMsg.InnerText = "ERROR !!...Please Check At Least On CheckBox.";
grdRnew.Focus();
set_timeout();
return false;
}
return true;
}

itextsharp trimming pdf document's pages

I have a pdf document that has form fields that I'm filling out programatically with c#. Depending on three conditions, I need to trim (delete) some of the pages from that document.
Is that possible to do?
for condition 1: I need to keep pages 1-4 but delete pages 5 and 6
for condition 2: I need to keep pages 1-4 but delete 5 and keep 6
for condition 3: I need to keep pages 1-5 but delete 6

Use PdfReader.SelectPages() combined with PdfStamper. The code below uses iTextSharp 5.5.1.
public void SelectPages(string inputPdf, string pageSelection, string outputPdf)
{
using (PdfReader reader = new PdfReader(inputPdf))
{
reader.SelectPages(pageSelection);
using (PdfStamper stamper = new PdfStamper(reader, File.Create(outputPdf)))
{
stamper.Close();
}
}
}
Then you call this method with the correct page selection for each condition.
Condition 1:
SelectPages(inputPdf, "1-4", outputPdf);
Condition 2:
SelectPages(inputPdf, "1-4,6", outputPdf);
or
SelectPages(inputPdf, "1-6,!5", outputPdf);
Condition 3:
SelectPages(inputPdf, "1-5", outputPdf);
Here's the comment from the iTextSharp source code on what makes up a page selection. This is in the SequenceList class which is used to process a page selection:
/**
* This class expands a string into a list of numbers. The main use is to select a
* range of pages.
* <p>
* The general systax is:<br>
* [!][o][odd][e][even]start-end
* <p>
* You can have multiple ranges separated by commas ','. The '!' modifier removes the
* range from what is already selected. The range changes are incremental, that is,
* numbers are added or deleted as the range appears. The start or the end, but not both, can be ommited.
*/

Instead of deleting pages in a document what you actually do is create a new document and only import the pages that you want to keep. Below is a full working WinForms app that does that (targetting iTextSharp 5.1.1.0). The last parameter to the function removePagesFromPdf is an array of pages to keep.
The code below works off of physical files but would be very easy to convert to something based on streams so that you don't have to write to disk if you don't want to.
using System;
using System.ComponentModel;
using System.IO;
using System.Linq;
using System.Windows.Forms;
using iTextSharp.text.pdf;
using iTextSharp.text;
namespace Full_Profile1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//The files that we are working with
string sourceFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string sourceFile = Path.Combine(sourceFolder, "Test.pdf");
string destFile = Path.Combine(sourceFolder, "TestOutput.pdf");
//Remove all pages except 1,2,3,4 and 6
removePagesFromPdf(sourceFile, destFile, 1, 2, 3, 4, 6);
this.Close();
}
public void removePagesFromPdf(String sourceFile, String destinationFile, params int[] pagesToKeep)
{
//Used to pull individual pages from our source
PdfReader r = new PdfReader(sourceFile);
//Create our destination file
using (FileStream fs = new FileStream(destinationFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document())
{
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
//Open the desitination for writing
doc.Open();
//Loop through each page that we want to keep
foreach (int page in pagesToKeep)
{
//Add a new blank page to destination document
doc.NewPage();
//Extract the given page from our reader and add it directly to the destination PDF
w.DirectContent.AddTemplate(w.GetImportedPage(r, page), 0, 0);
}
//Close our document
doc.Close();
}
}
}
}
}
}

Here is the code I use to copy all but the last page of an existing PDF. Everything is in memory streams. The variable pdfByteArray is a byte[] of the original pdf obtained using ms.ToArray(). pdfByteArray is overwritten with the new PDF.
PdfReader originalPDFReader = new PdfReader(pdfByteArray);
using (MemoryStream msCopy = new MemoryStream())
{
using (Document docCopy = new Document())
{
using (PdfCopy copy = new PdfCopy(docCopy, msCopy))
{
docCopy.Open();
for (int pageNum = 1; pageNum <= originalPDFReader.NumberOfPages - 1; pageNum ++)
{
copy.AddPage(copy.GetImportedPage(originalPDFReader, pageNum ));
}
docCopy.Close();
}
}
pdfByteArray = msCopy.ToArray();

I know it's an old post, Simply I extend the #chris-haas solution to the next level.
Delete the selected pages after that save them into the separate pdf file.
//ms is MemoryStream and fs is FileStream
ms.CopyTo(fs);
Save the Stream to a separate pdf file. 100% working without any error.
pageRange="5"
pageRange="2,15-20"
pageRange="1-5,15-20"
You can pass the pageRange vales like the above-given samples.
private void DeletePagesNew(string pageRange, string SourcePdfPath, string OutputPdfPath, string Password = "")
{
try
{
var pagesToDelete = new List<int>();
if (pageRange.IndexOf(",") != -1)
{
var tmpHold = pageRange.Split(',');
foreach (string nonconseq in tmpHold)
{
if (nonconseq.IndexOf("-") != -1)
{
var rangeHold = nonconseq.Split('-');
for (int i = Convert.ToInt32(rangeHold[0]), loopTo = Convert.ToInt32(rangeHold[1]); i <= loopTo; i++)
pagesToDelete.Add(i);
}
else
{
pagesToDelete.Add(Convert.ToInt32(nonconseq));
}
}
}
else if (pageRange.IndexOf("-") != -1)
{
var rangeHold = pageRange.Split('-');
for (int i = Convert.ToInt32(rangeHold[0]), loopTo1 = Convert.ToInt32(rangeHold[1]); i <= loopTo1; i++)
pagesToDelete.Add(i);
}
else
{
pagesToDelete.Add(Convert.ToInt32(pageRange));
}
var Reader = new PdfReader(SourcePdfPath);
int[] pagesToKeep;
pagesToKeep = Enumerable.Range(1, Reader.NumberOfPages).ToArray();
using (var ms = new MemoryStream())
{
using (var fs = new FileStream(OutputPdfPath, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (var doc = new Document())
{
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
foreach (int p in pagesToKeep)
{
if (pagesToDelete.FindIndex(s => s == p) != -1)
{
continue;
}
// doc.NewPage()
// w.DirectContent.AddTemplate(w.GetImportedPage(Reader, p), 0, 0)
//
doc.SetPageSize(Reader.GetPageSize(p));
doc.NewPage();
PdfContentByte cb = w.DirectContent;
PdfImportedPage pageImport = w.GetImportedPage(Reader, p);
int rot = Reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0f, 1.0f, 0, 0, Reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0f, 0, 0, 1.0f, 0, 0);
}
cb = default;
pageImport = default;
rot = default;
}
ms.CopyTo(fs);
fs.Flush();
doc.Close();
}
}
}
}
pagesToDelete = null;
Reader.Close();
Reader = default;
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}

Develop Reference

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

split PDF into multiple files in C# - c#

A previous question answers your partially - how to split pdf documents, if you know where the barcodes are then you can split the documents easily: How can I split up a PDF file into pages (preferably C#) The recommendation is a library called PDFSharp and a sample demonstrating PDF splitting.

Related

How to extract images from pdf and save each as a jpeg with C# & itext7

ABCpdf only rendering first page

Adding a Table into the Footer of a PDF

How to merge multiple pdf files (generated in run time)?

itextsharp trimming pdf document's pages

Categories

Resources