Does any know how to merge two pdf (one of them have cropbox and the other don't have, and they have different page size) using iText?
here is the code, that I use for PDF merge:
public bool MergeFiles(string destinationFile, string[] sourceFiles)
{
bool bSucess = true;
if (System.IO.File.Exists(destinationFile))
System.IO.File.Delete(destinationFile);
string[] sSrcFile;
sSrcFile = new string[2];
string[] arr = new string[2];
for (int i = 0; i <= sourceFiles.Length - 1; i++)
{
if (sourceFiles[i] != null)
{
if (sourceFiles[i].Trim() != "")
arr[i] = sourceFiles[i].ToString();
}
}
if (arr != null)
{
sSrcFile = new string[2];
for (int ic = 0; ic <= arr.Length - 1; ic++)
{
sSrcFile[ic] = arr[ic].ToString();
}
}
string sPDFPath = Server.MapPath(#"pdftemp/");
Scripting.FileSystemObject fso = new Scripting.FileSystemObjectClass();
try
{
int f = 0;
PdfReader reader = new PdfReader(sSrcFile[f]);
int n = reader.NumberOfPages;
Console.WriteLine("There are " + n + " pages in the original file.");
Document document = new Document(PageSize.A4);
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(destinationFile, FileMode.Create));
PdfDate st = new PdfDate(DateTime.Today);
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
while (f < sSrcFile.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(PageSize.A4);
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
Console.WriteLine("Processed page " + i);
}
f++;
if (f < sSrcFile.Length)
{
reader = new PdfReader(sSrcFile[f]);
n = reader.NumberOfPages;
Console.WriteLine("There are " + n + " pages in the original file.");
}
}
document.Close();
}
catch (Exception e)
{
bSucess = false;
}
return bSucess;
}
But it seems not handling cropbox difference between pages (content mis-position on page that have cropbox) , anyone have a better code to put the content in the right position?
You're doing it the hard way:
PdfReader reader = null;
PdfCopy copier = new PdfCopy(outputStream);
String paths[] = ...;
for (String path : paths) {
reader = new PdfReader(path);
for (int pageNum = 1; pageNum <= reader.getNumberOfPages(); ++pageNum) {
copier.addPage(copier.getImportedPage(reader, pageNum) );
}
}
That'll copy page annotations as well as page contents. Note that it Will Not copy document-level stuff those annotations might need to function properly (document script for a form fields' validation entry point for example).
And it'll copy the original page verbatim, regardless of various page box sizes or rotation.
Related
// THIS IS STRING BY USING WHICH I AM EXTRACTING ALL TEXT FROM PDF FILE
string t = ExtractAlTextFromPdf(inputFile);
...
public static string ExtractAlTextFromPdf(string inputFile)
{
//Sanity checks
if (string.IsNullOrEmpty(inputFile))
throw new ArgumentNullException("inputFile");
if (!System.IO.File.Exists(inputFile))
throw new System.IO.FileNotFoundException("Cannot find inputFile", inputFile);
//Create a stream reader (not necessary but I like to control locks and permissions)
using (FileStream SR = new FileStream(inputFile, FileMode.Open, FileAccess.Read, FileShare.Read))
{
//Create a reader to read the PDF
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(SR);
//Create a buffer to store text
StringBuilder text = new StringBuilder();
//Use the PdfTextExtractor to get all of the text on a page-by-page basis
for (int i = 1; i <= reader.NumberOfPages; i++)
{
text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
}
return text.ToString();
}
}
// when i pass this string to method
public string stem(string word)
{
if (word.Length < 3) return word;
StringBuilder sb = new StringBuilder(word.ToLower());
if (sb[0] == '\'') sb.Remove(0, 1);
for (int i = 0; i < exceptions.Length / 2; ++i)
if (word == exceptions[i, 0])
return exceptions[i, 1];
int r1 = 0, r2 = 0;
changeY(sb);
computeR1R2(sb, ref r1, ref r2);
step0(sb);
step1a(sb);
for (int i = 0; i < exceptions2.Length; ++i)
if (sb.ToString() == exceptions2[i])
return exceptions2[i];
step1b(sb, r1);
step1c(sb);
step2(sb, r1);
step3(sb, r1, r2);
step4(sb, r2);
step5(sb, r1, r2);
return sb.ToString().ToLower();
}
it give an error
I am using the Itextsharp PDF tool to generate PDF using asp.net and C# , In that in one PDFPtable the last row data is repeating on next page means part of a table forwarded to next page. so want to show header for that Table on next page.
This is my code.
Document doc = new Document(iTextSharp.text.PageSize.A4.Rotate(), 10, 10, 20, 10);
string pdfFilePath = Server.MapPath(".") + "/pdf/myPdf" + Guid.NewGuid().ToString("N") + ".pdf";
try
{
//Create Document class object and set its size to letter and give space left, right, Top, Bottom Margin
FileStream fs = new FileStream(pdfFilePath, FileMode.Create);
//
PdfWriter wri = PdfWriter.GetInstance(doc, fs);
//Header Section
string strImagePath = Server.MapPath("Images");
string strReportName = oReportDTO.strReportName + " " + oReportDTO.oDateRange.FromDate.ToString(DateFormat);
Chunk headerchunk = new Chunk(strReportName, new Font(1, 8.0f));
HeaderFooter oHeader = new HeaderFooter(new Phrase(headerchunk), false);
oHeader.Border = Rectangle.NO_BORDER;
oHeader.Alignment = 1;
doc.Header = oHeader;
//Footer Section
string name ="Logged in as : " +currentLoggedInUser.UserName + new string(' ',70)+ "Page Number : " + doc.PageNumber;
Chunk Footerchunk1 = new Chunk(name, new Font(1, 5.0f));
HeaderFooter oFooter1 = new HeaderFooter(new Phrase(Footerchunk1), true);
oFooter1.Border = Rectangle.NO_BORDER;
oFooter1.Alignment = 1;
doc.Footer = oFooter1;
iTextSharp.text.Image imgFooter = iTextSharp.text.Image.GetInstance(System.Drawing.Image.FromFile(strImagePath + "/TransRisk Logo168x97.png"), System.Drawing.Imaging.ImageFormat.Jpeg);
imgFooter.ScaleAbsolute(80, 50);
Chunk footerchunk = new Chunk(imgFooter, 260.0f, 0.0f);
HeaderFooter oFooter = new HeaderFooter(new Phrase(name),new Phrase(footerchunk));
oFooter.Border =Rectangle.NO_BORDER;
doc.Footer = oFooter;
doc.Open();//Open Document to write
Font font8 = FontFactory.GetFont("ARIAL", 7);
HtmlTable dt = customMatrixReport;
if (dt.Rows.Count > 0)
{
//Craete instance of the pdf table and set the number of column in that table
int startColumnPosition = 1;
int endColumnPosition = 13;//End Column number in Pdf Page
int NoOfReports = Convert.ToInt32(Math.Ceiling((decimal)(dt.Rows[0].Cells.Count - 1) / endColumnPosition));//Count How many Pages to show
int pageRowCount = 0;
List<PdfPCell> lstHeaderCells = new List<PdfPCell>();
PdfPTable oPdfTable = null;
PdfPCell oPdfPCell = null;
for (int report = 1; report <= NoOfReports; ++report)
{
doc.Add(oHeader);
//ColumnText.ShowTextAligned(
int noOfColumns = -1;
if (endColumnPosition > dt.Rows[0].Cells.Count - 1) { endColumnPosition = dt.Rows[0].Cells.Count - 1; noOfColumns = (endColumnPosition - startColumnPosition) + 2; oPdfTable = new PdfPTable(noOfColumns); }
else
{
oPdfTable = new PdfPTable(14);
//Widths Count
noOfColumns = 14;
}
oPdfTable.TotalWidth = 650f;
List<float> lstwidths = new List<float>();
lstwidths.Add(100f);
for (int i = 2; i <= noOfColumns; ++i)
{
lstwidths.Add(80f);
}
oPdfTable.SetTotalWidth(lstwidths.ToArray());
pageRowCount = 0;
for (int rows = 0; rows < dt.Rows.Count; rows++)
{
//PageRowCount
pageRowCount = pageRowCount + 1;
//Description celll
if (rows == 0 )
{
//Background color for table header
oPdfPCell = new PdfPCell(new Phrase(new Chunk(dt.Rows[rows].Cells[0].InnerText, new Font(1, 8.0f, 1, Color.WHITE))));
oPdfPCell.BackgroundColor = new Color(118, 147, 199);
oPdfTable.AddCell(oPdfPCell);
}
else
{
//background color for Table cells
oPdfPCell = new PdfPCell(new Phrase(new Chunk(dt.Rows[rows].Cells[0].InnerText,new Font(1, 8.0f))));
oPdfPCell.BackgroundColor = new Color(232, 237, 255);
oPdfTable.AddCell(oPdfPCell);
}
//for header cel
if (rows == 0)
{
lstHeaderCells.Add(oPdfPCell);
}
for (int column = startColumnPosition; column <= endColumnPosition; column++)
{
if (rows == 0)
{
oPdfPCell = new PdfPCell(new Phrase(new Chunk(dt.Rows[rows].Cells[column].InnerText, new Font(1, 8.0f, 1, Color.WHITE))));
oPdfPCell.BackgroundColor = new Color(118, 147, 199);
oPdfTable.AddCell(oPdfPCell);
}
else
{
oPdfPCell = new PdfPCell(new Phrase(new Chunk(dt.Rows[rows].Cells[column].InnerText, new Font(1, 8.0f))));
oPdfPCell.BackgroundColor = new Color(232, 237, 255);
oPdfPCell.Column.Alignment = 2;
oPdfTable.AddCell(oPdfPCell);
}
if (rows == 0)
{
lstHeaderCells.Add(oPdfPCell);
}
}
if (pageRowCount >= 40 && rows != (dt.Rows.Count - 1))
{
pageRowCount = 0; doc.Add(oPdfTable); doc.NewPage(); doc.Add(oHeader); oPdfTable = new PdfPTable(noOfColumns); oPdfTable.SetTotalWidth(lstwidths.ToArray()); foreach (PdfPCell oHeaderCell in lstHeaderCells) { oPdfTable.AddCell(oHeaderCell); }
}
}
startColumnPosition = endColumnPosition + 1;
endColumnPosition = endColumnPosition + 13;
oPdfTable.SpacingBefore = 10;
oPdfTable.SpacingAfter = 10;
doc.Add(oPdfTable);
//doc.NewPage();
}
}
else
doc.Add(oHeader);
}
finally
{
doc.Close();
}
help me..
Change this line-
if (endColumnPosition > dt.Rows[0].Cells.Count - 1) { endColumnPosition = dt.Rows[0].Cells.Count - 1; noOfColumns = (endColumnPosition - startColumnPosition) + 2; oPdfTable = new PdfPTable(noOfColumns); }
To this-
if (endColumnPosition > dt.Rows[0].Cells.Count - 1) { endColumnPosition = dt.Rows[0].Cells.Count - 1; noOfColumns = (endColumnPosition - startColumnPosition) + 2; oPdfTable = new PdfPTable(noOfColumns); oPdfTable.HeaderRows = 1;}
After this line-
oPdfTable = new PdfPTable(14);
Add this-
oPdfTable.HeaderRows = 1;
Change this line-
pageRowCount = 0; doc.Add(oPdfTable); doc.NewPage(); doc.Add(oHeader); oPdfTable = new PdfPTable(noOfColumns); oPdfTable.SetTotalWidth(lstwidths.ToArray()); foreach (PdfPCell oHeaderCell in lstHeaderCells) { oPdfTable.AddCell(oHeaderCell); }
To this-
pageRowCount = 0; doc.Add(oPdfTable); doc.NewPage(); doc.Add(oHeader); oPdfTable = new PdfPTable(noOfColumns); oPdfTable.HeaderRows = 1; oPdfTable.SetTotalWidth(lstwidths.ToArray()); foreach (PdfPCell oHeaderCell in lstHeaderCells) { oPdfTable.AddCell(oHeaderCell); }
I am working on digital signature in pdf. I am able to add a single signature in PDF, But when I am trying to add same signature at multiple place I getting issue invalid signature.
I am using below code to digitally sign the single PDF file at multiple location but sign appear at one place .
At the other place its not showing.(invalid signature).
I am using below is the code
string dttime = DateTime.Now.ToString("yyyy-MM-ddTHH:mm:ss");
PdfReader pdfReader = null;
PdfStamper stamper = null;
try
{
pdfReader = new PdfReader(In_File_Name);
stamper = PdfStamper.CreateSignature(pdfReader, new FileStream(New_File_Name, FileMode.Create, FileAccess.Write), '\0', null, true);
//Annotation code for multiple signatures
PdfSignatureAppearance appearance = stamper.SignatureAppearance;
appearance.Reason = "";// ConfigurationManager.AppSettings["APPEARANCE_REASON"].ToString();
appearance.Location = "";// ConfigurationManager.AppSettings["APPEARANCE_LOCATION"].ToString();
appearance.SignDate = DateTime.Now.AddMinutes(3);
appearance.Image = null;
appearance.Acro6Layers = true;
//appearance.SetVisibleSignature(new iTextSharp.text.Rectangle(540, 290, 430, 320), 1, null);
string Page_Attrb = "1(540, 290, 430, 320);2(540, 290, 430, 320)"; //In_Page_Attrb;
String[] Pageattr = Page_Attrb.Split(new[] { ";" }, StringSplitOptions.RemoveEmptyEntries);
String[] Pageattr1 = Pageattr[0].Split(new[] { "(", ")" }, StringSplitOptions.RemoveEmptyEntries);
int PageNo = int.Parse(Pageattr1[0]);//1
float lx = 0, ly = 0, hx = 0, hy = 0; string[] Attr = Pageattr1[1].Split(',');
if (Attr.Length > 3)
{
lx = float.Parse(Attr[0]);
ly = float.Parse(Attr[1]);
hx = float.Parse(Attr[2]);
hy = float.Parse(Attr[3]);
}
int csize = 8192;
PdfContentByte canvas = stamper.GetOverContent(1);
BaseFont bfTimes = BaseFont.CreateFont(BaseFont.COURIER_BOLD, BaseFont.WINANSI, BaseFont.EMBEDDED);
iTextSharp.text.Font f = new iTextSharp.text.Font(bfTimes, iTextSharp.text.Font.BOLD, 15);
canvas.SetFontAndSize(bfTimes, 6);
canvas.SetTextRenderingMode(PdfContentByte.TEXT_RENDER_MODE_FILL_STROKE);
for (int page = 0; page <= 1; page++)
{
String[] Pageattr2 = Pageattr[page].Split(new[] { "(", ")" }, StringSplitOptions.RemoveEmptyEntries);
int PageNo1 = int.Parse(Pageattr2[0]);
float lx1 = 0, ly1 = 0, hx1 = 0, hy1 = 0;
string[] Attr1 = Pageattr2[1].Split(',');
if (Attr1.Length > 3)
{
lx1 = float.Parse(Attr1[0]);
ly1 = float.Parse(Attr1[1]);
hx1 = float.Parse(Attr1[2]);
hy1 = float.Parse(Attr1[3]);
}
iTextSharp.text.Rectangle rect = new iTextSharp.text.Rectangle(lx1, ly1, hx1, hy1);
PdfAnnotation annotation = PdfAnnotation.CreateFreeText(stamper.Writer, rect, appearance.Layer2Text, canvas);
PdfAppearance tp = PdfAppearance.CreateAppearance(stamper.Writer, 444, 555);
annotation.Flags = PdfAnnotation.FLAGS_READONLY | PdfAnnotation.FLAGS_LOCKED | PdfAnnotation.FLAGS_PRINT;
PdfDate pdfdate = new PdfDate();
annotation.Title = "dsds";
annotation.Border = new PdfBorderArray(0, 0, 0, new PdfDashPattern());
annotation.Put(PdfName.ADOBE_PPKLITE, PdfName.ADBE_PKCS7_DETACHED);
stamper.AddAnnotation(annotation, PageNo1);
}
Dictionary<PdfName, int> exclusionSizes = new Dictionary<PdfName, int>();
exclusionSizes[PdfName.CONTENTS] = csize * 2 + 2;
appearance.CryptoDictionary = (PdfDictionary)new PdfSignature(PdfName.ADOBE_PPKLITE, PdfName.ADBE_PKCS7_DETACHED)
{
Reason = appearance.Reason,
Location = appearance.Location,
Contact = appearance.Contact,
Date = new PdfDate(appearance.SignDate)
};
appearance.PreClose(exclusionSizes);
Stream s = appearance.GetRangeStream();
//byte[] by = GetStreamAsByteArray(s);
//string hashdocument = GenerateFilehash256Hex(by);
string hashdocument = genaratePDFHash(s);
string eSignReq = Esigndoc(In_OTP, In_Aadhaar_No, hashdocument);
string res = postXMLData(ConfigurationManager.AppSettings["eSignURL"].ToString(), eSignReq);
//Fetch the Esign XML response..
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.LoadXml(res);
XmlNode xmlNode = xmlDocument.SelectSingleNode("EsignResp");
if (xmlNode.Attributes["errCode"] != null && xmlNode.Attributes["errCode"].Value != "NA")
{
}
else
{
byte[] numArray3 = this.SignDocument(res);
byte[] bytes1 = new byte[csize];
Array.Copy((Array)numArray3, 0, (Array)bytes1, 0, numArray3.Length);
PdfDictionary update = new PdfDictionary();
update.Put(PdfName.CONTENTS, (PdfObject)new PdfString(bytes1).SetHexWriting(true));
appearance.Close(update);
//progressBar1.Value = 100;
// MessageBox.Show("eSign Operation completed successfully.");
//fileStream1.Close();
//fileStream.Close();
//lblMsg.Text = "File Signed Successfully";
//For Multiple eSign Doc Start Here ...............
//if (Pageattr.Length > 1)
//{
// for (int k = 1; k <= Pageattr.Length - 1; k++)
// {
// String[] Pageattr1 = Pageattr[k].Split(new[] { "(", ")" }, StringSplitOptions.RemoveEmptyEntries);
// PageNo = int.Parse(Pageattr1[0]);
// lx = 0; ly = 0; hx = 0; hy = 0;
// string[] Attr = Pageattr1[1].Split(',');
// if (Attr.Length > 3)
// lx = float.Parse(Attr[0]);
// ly = float.Parse(Attr[1]);
// hx = float.Parse(Attr[2]);
// hy = float.Parse(Attr[3]);
// }
// New_File_Name = PdfeSign(New_File_Name, PageNo, res, lx, ly, hx, hy, strLayer2Text);
// }
//}
//For Multiple eSign Doc End Here ...............
}
}
catch (Exception ex)
{
stamper.Close();
pdfReader.Close();
}
finally
{
stamper.Close();
pdfReader.Close();
}
I'm trying to export my datagridview to PDF however while doing that I want to group the rows which have the same Group name.
The code I use to export to pdf is at the below;
private void PrintReport_Click(object sender, EventArgs e)
{
try
{
//create iTextSharp table
PdfPTable pdfTable = new PdfPTable(dataGridView1.ColumnCount);
pdfTable.DefaultCell.Padding = 3;
pdfTable.WidthPercentage = 30;
pdfTable.HorizontalAlignment = Element.ALIGN_LEFT;
pdfTable.DefaultCell.BorderWidth = 1;
//Adding Header row
PdfPCell cell = new PdfPCell(new Phrase("Report"));
cell.Colspan = 11;
cell.BackgroundColor = new iTextSharp.text.Color(50, 50, 120);
cell.HorizontalAlignment = 1;
pdfTable.TotalWidth = 1200f;
pdfTable.LockedWidth = true;
pdfTable.AddCell(cell);
pdfTable.AddCell("Group");
pdfTable.AddCell("Numara");
pdfTable.AddCell("Müşteri ID");
pdfTable.AddCell("Tanım");
pdfTable.AddCell("IP Adresi");
pdfTable.AddCell("Kullanıcı");
pdfTable.AddCell("Şifre");
pdfTable.AddCell("Domain");
pdfTable.AddCell("2.IP");
pdfTable.AddCell("2.Kullanıcı");
pdfTable.AddCell("2.Kullanıcı Şifre");
//Adding DataRow
for (int i = 0; i < dataGridView1.Rows.Count; i++)
{
for (int j = 0; j < dataGridView1.Columns.Count; j++)
{
if (dataGridView1.Rows[i].Cells[j].Value != null)
{
if (j == 6|| j == 10)
{
pdfTable.AddCell("*****");
}
else if(j==0)
{
pdfTable.AddCell(dataGridView1.Rows[i].Cells[6].Value.ToString());
}
else if(j==6)
{
pdfTable.AddCell(dataGridView1.Rows[i].Cells[0].Value.ToString());
}
else
{
pdfTable.AddCell(dataGridView1.Rows[i].Cells[j - 1].Value.ToString());
}
}
else
{
pdfTable.AddCell(" ");
}
}
}
//pdfTable.AddCell(cells.Value.ToString());
//Exporting to PDF
string folderPath = "C:\\PDFs\\";
if (!Directory.Exists(folderPath))
{
Directory.CreateDirectory(folderPath);
}
using (FileStream stream = new FileStream(folderPath + "Rapor.pdf", FileMode.Create))
{
Document pdfDoc = new Document(PageSize.A2, 10f, 10f, 10f, 0f);
PdfWriter.GetInstance(pdfDoc, stream);
pdfDoc.Open();
pdfDoc.Add(pdfTable);
pdfDoc.Close();
stream.Close();
}
MessageBox.Show("C:\\PDFs uzantısına rapor kaydedildi!");
}
catch (Exception msg)
{
MessageBox.Show(msg.Message, "Error");
}
}
Code works pretty well, it exports the datagridview to pdf file but it does not work the way I want, It does not group columns by 'Group Name'
I'm stuck in this problem any help would be appreciated.
Can you just sort the results, create a pdfTable for each 'group' that has data?
I have solved the problem with a little trick, I have listed all the groups in a list named 'testlist' So I can manage the handle the sutiation within 1 pdfTable
There is the code snippet:
for (int element = 0; element < testList.Count;element++ )
{
string name = testList.ElementAt(element).ToString();
PdfPCell cell1 = new PdfPCell(new Phrase(name));
cell1.BackgroundColor = new iTextSharp.text.Color(160, 160, 210);
cell1.Colspan = 11;
cell1.HorizontalAlignment = 1;
pdfTable.AddCell(cell1);
for (int i = 0; i < dataGridView1.Rows.Count; i++)
{
for (int j = 0; j < dataGridView1.Columns.Count; j++)
{
if (dataGridView1.Rows[i].Cells[j].Value != null)
{
if(dataGridView1.Rows[i].Cells[6].Value.ToString() == name.ToString())
{
if (j == 6 || j == 10)
{
pdfTable.AddCell("*****");
}
else if (j == 0)
{
pdfTable.AddCell(dataGridView1.Rows[i].Cells[6].Value.ToString());
}
else if (j == 6)
{
pdfTable.AddCell(dataGridView1.Rows[i].Cells[0].Value.ToString());
}
else
{
pdfTable.AddCell(dataGridView1.Rows[i].Cells[j - 1].Value.ToString());
}
}
}
else
{
pdfTable.AddCell(" ");
}
}
}
}
I need a help with iText in C#. I'm trying to extract a table from a PDF file and save this into a new CSV file, keeping the values in the correct places. For this, I thought the solution was to create a two-dimensional array to organize the data.
Extracting all information from PDF with iText, I saw it was possible to get some numerical data that seemed to be the position of a piece of text on the page and I organized my array based these indexes. It didn’t work, the text was completely dispersed in various different cells. Now, I want to know what this values means, because they don't follow a "correct" order and I want to know if is possible to organize the future table with this.
I'm using ";" as delimiter cell.
For testing, I'm using this PDF
http://www.americana.sp.gov.br/americanaV5/download/contasPublicas/Despesa_Categoria_Economica_2014.pdf
Here's my code:
protected void Button2_Click(object sender, EventArgs e)
{
try
{
TextBox2.Text = "";
byte[] conteudo = download(TextBox1.Text);
if (conteudo != null)
{
PdfReader leitorp = new PdfReader(conteudo);
ITextExtractionStrategy estrategia = new SimpleTextExtractionStrategy();
List<Celula> celulas = new List<Celula>();
int i, j;
for (i = 1; i <= leitorp.NumberOfPages; i++)
{
//Total and crude extraction of all information from text in PDF via iText, separate lines in an array of strings.
string[] linhas = (Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, leitorp.GetPageContent(i)))).Split('\n');
for (j = 1; j < linhas.Length; j++)
{
if (linhas[j].Length > 2)
{
if (linhas[j].Substring(0, 2).Equals("BT"))
{
string[] campos = linhas[j].Split(' ');
Celula umacelula = new Celula();
umacelula.coluna = float.Parse(campos[1]);
umacelula.linha = float.Parse(campos[2]);
linhadodebug = j;
int t1 = linhas[j].IndexOf('(');
int t2 = linhas[j].LastIndexOf(')');
umacelula.conteudo = System.Text.RegularExpressions.Regex.Replace((linhas[j].Substring(linhas[j].IndexOf('(') + 1, (linhas[j].LastIndexOf(')') - 1 - linhas[j].IndexOf('(')))), #"\s\s+", "");
celulas.Add(umacelula);
}
}
}
}
leitorp.Close();
string[] totallinhas = new string[celulas.Count];
string[] totalcolunas = new string[celulas.Count];
for (i = 0; i < celulas.Count; i++)
{
totallinhas[i] = celulas[i].linha.ToString();
totalcolunas[i] = celulas[i].coluna.ToString();
}
totallinhas = totallinhas.Distinct().ToArray();
totalcolunas = totalcolunas.Distinct().ToArray();
Array.Sort(totallinhas);
Array.Reverse(totallinhas);
Array.Sort(totalcolunas);
Array.Reverse(totalcolunas);
string[,] matriz = new string[totallinhas.Length + 1, totalcolunas.Length + 1];
for (i = 1; i < totallinhas.Length; i++)
{
matriz[i, 0] = totallinhas[i - 1].ToString();
}
for (i = 1; i < totalcolunas.Length; i++)
{
matriz[0, i] = totalcolunas[i - 1].ToString();
}
int z;
for (i = 0; i < celulas.Count(); i++)
{
for (j = 1; j < matriz.GetLength(0); j++)
{
for (z = 1; z < matriz.GetLength(1); z++)
{
if ((celulas[i].linha.ToString().Equals(matriz[j, 0])) && (celulas[i].coluna.ToString().Equals(matriz[0, z])))
{
matriz[j, z] = celulas[i].conteudo.ToString();
}
}
}
}
StringWriter texto = new StringWriter();
for (i = 0; i < matriz.GetLength(0); i++)
{
for (j = 0; j < matriz.GetLength(1); j++)
{
texto.Write(matriz[i, j] + ";");
}
texto.WriteLine();
}
Response.ContentType = "text/plain";
Response.AddHeader("content-disposition", "attachment;filename=" + string.Format("teste-{0}.csv", string.Format("{0:ddMMyyyy}", DateTime.Today)));
Response.Clear();
using (StreamWriter writer = new StreamWriter(Response.OutputStream, Encoding.UTF8))
{
writer.Write(texto.ToString());
}
Response.End();
}
}
catch (Exception E)
{
TextBox2.Text = "Erro Button2_Click: " + E.Message + " # " + linhadodebug.ToString();
}
}
And here, the struct of celula (cell) and method to download the file:
public struct Celula
{
public float coluna;
public float linha;
public string conteudo;
public Celula(float coluna, float linha, string conteudo)
{
this.coluna = coluna;
this.linha = linha;
this.conteudo = conteudo;
}
public Celula(Celula celula)
{
this.coluna = celula.coluna;
this.linha = celula.linha;
this.conteudo = celula.conteudo;
}
}
protected byte[] download(string url)
{
try
{
WebRequest endereco = HttpWebRequest.Create(url);
Stream leitor = endereco.GetResponse().GetResponseStream();
MemoryStream memoria = new MemoryStream();
byte[] conteudo = null;
int count = 0;
do
{
byte[] buffer = new byte[1024];
count = leitor.Read(buffer, 0, 1024);
memoria.Write(buffer, 0, count);
}
while (leitor.CanRead && count > 0);
// Converte da memória direto para bytes
conteudo = memoria.ToArray();
if (conteudo != null)
{
return conteudo;
}
else
{
TextBox2.Text = "Error: download null.";
return null;
}
}
catch (Exception E)
{
TextBox2.Text = "Error download: " + E.Message;
return null;
}
}
This is a non-profit project. I hope you can help me. Thank you!