pdfbox issue
I used pdfbox to extract text from PDF to my richtextbox.
I don't know what's the problem but there are PDF that are good but there are PDF that throws an exception, the exception is:
Object reference not set to an instance of an object.
Here's my code:
using org.pdfbox.pdmodel;
using org.pdfbox.util;
private void pdfButton_Click(object sender, EventArgs e)
{
OpenFileDialog openFD = new OpenFileDialog();
openFD.FileName = "";
openFD.InitialDirectory = "C:\\";
openFD.Filter = "All PDF Files|*.PDF";
openFD.Title = "Browse all PDF files";
if (openFD.ShowDialog() == DialogResult.OK)
{
try
{
pdf_filename = Path.GetFileNameWithoutExtension(openFD.Filename);
PDDocument pdfFile = PDDocument.load(openFD.Filename);
PDFTextStripper pdfStripper = new PDFTextStripper();
richtextBox1.Text = pdfStripper.getText(pdfFile);
textBox1.Text = Path.GetFileName(openFD.Filename);
}
catch (Exception error)
{
MessageBox.Show(error.ToString());
}
}
}
I fixed the issue using iTextSharp. This was advised by my co-worker, I changed the PDFBox by iTextSharp.
If someone will have the same issue as me here's the working code:
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
private void pdfButton_Click(object sender, EventArgs e)
{
OpenFileDialog openFD = new OpenFileDialog();
openFD.FileName = "";
openFD.InitialDirectory = "C:\\";
openFD.Filter = "All PDF Files|*.PDF";
openFD.Title = "Browse all PDF files";
if (openFD.ShowDialog() == DialogResult.OK)
{
try
{
pdf_filename = Path.GetFileNameWithoutExtension(openFD.Filename);
richtextBox1.Text = ReadPdf(openFD.FileName);
textBox1.Text = Path.GetFileName(openFD.Filename);
}
catch (Exception error)
{
MessageBox.Show(error.ToString());
}
}
}
private string ReadPdf(string filename)
{
if (!File.Exists(filename)) return string.Empty;
PdfReader reader = new PdfReader(filename);
string text = string.Empty;
for (int page = 1; page <= reader.NumberOfPages; page++)
{
text += PdfTextExtractor.GetTextFromPage(reader, page);
}
return text;
}
Related
I am trying to build a small .pdf -> .txt / searchable .pdf converter, but I am having trouble to assign the first var result to the other buttons
Made myself a "solution" but the code seems too messed and exagerated.
using IronOcr;
using System;
using System.IO;
namespace ocr
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = "pdf files (*.pdf)|*.pdf|All Files (*.*)|*.*";
if (ofd.ShowDialog() == DialogResult.OK)
{
textBox1.Text = ofd.FileName;
var Ocr = new IronTesseract(); // nothing to configure
IronOcr.License.LicenseKey = "SOMELICENSEKEY";
using (var Input = new OcrInput())
{
Input.AddPdf(ofd.FileName, "password");
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
richTextBox1.Text = Result.Text;
}
}
}
private void button2_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = "pdf files (*.pdf)|*.pdf|All Files (*.*)|*.*";
if (ofd.ShowDialog() == DialogResult.OK)
{
textBox1.Text = ofd.FileName;
var Ocr = new IronTesseract(); // nothing to configure
IronOcr.License.LicenseKey = "SOMELICENSEKEY";
using (var Input = new OcrInput())
{
Input.AddPdf(ofd.FileName, "password");
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
Result.SaveAsTextFile("pdf.txt");
}
}
}
private void button3_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = "pdf files (*.pdf)|*.pdf|All Files (*.*)|*.*";
if (ofd.ShowDialog() == DialogResult.OK)
{
textBox1.Text = ofd.FileName;
var Ocr = new IronTesseract(); // nothing to configure
IronOcr.License.LicenseKey = "SOMELICENSEKEY";
using (var Input = new OcrInput())
{
Input.AddPdf(ofd.FileName, "password");
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
Result.SaveAsSearchablePdf("pdfpesquisavel.pdf");
}
}
}
}
}
Tried to assign and use the variable "Result" in the others buttons fuctions (button 2 and button 3)
But it didn't worked.
I tried to create a little program to convert PDF to a TIF file using ghostscript but unfortunately it results in an error ("null"). Can't figure out why it's failing:
void button1_Click(object sender, EventArgs e)
{
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.Filter = "PDF Files|*.pdf";
if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
strfilename = openFileDialog1.FileName;
}
}
void button2_Click(object sender, EventArgs e)
{
FolderBrowserDialog targetfolder = new FolderBrowserDialog();
if (targetfolder.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
folder = targetfolder.SelectedPath;
}
}
void button3_Click(object sender, EventArgs e)
{
const string DLL_64BITS = "gsdll64.dll";
string NomeGhostscriptDLL;
NomeGhostscriptDLL = DLL_64BITS;
GhostscriptVersionInfo gvi = new GhostscriptVersionInfo(NomeGhostscriptDLL);
///var xDpi = 300;
var yDpi = 300;
using (var rasterizer = new GhostscriptRasterizer())
{
byte[] buffer = File.ReadAllBytes(strfilename);
MemoryStream ms = new MemoryStream(buffer);
rasterizer.Open(ms, gvi, true);
int PdfPages = rasterizer.PageCount;
for (int pageNumber = 1; pageNumber < rasterizer.PageCount; pageNumber++)
{
string outputTIFPath = Path.Combine(folder, "00" + pageNumber.ToString() + ".tiff");
Image pdf2TIF = rasterizer.GetPage(yDpi, pageNumber);
MessageBox.Show(outputTIFPath);
pdf2TIF.Save(outputTIFPath, ImageFormat.Tiff);
}
rasterizer.Close();
}
}
The error looks like this
Can anyone help me to sort this out?
try adding this
MyPlaceHolder.Controls.Add(pd2TIF);
below:
Image pdf2TIF = rasterizer.GetPage(yDpi, pageNumber);
I just read that on a different thread. im not 100% sure if it works
I have a code that let user select the files and load the data in the text files to datagridview.
private void cmdload_Click(object sender, EventArgs e)
{
DataTable table = new DataTable();
table.Columns.Add("Point");
table.Columns.Add("X");
table.Columns.Add("Y");
table.Columns.Add("Z");
table.Columns.Add("R");
table.Columns.Add("A");
table.Columns.Add("B");
table.Columns.Add("C");
Stream myStream = null;
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = "\\Yamaha";
openFileDialog1.Filter = "Data Files (*.PNT)|*.PNT";
openFileDialog1.FilterIndex = 2;
openFileDialog1.RestoreDirectory = true;
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
try
{
if ((myStream = openFileDialog1.OpenFile()) != null)
{
using (myStream)
{
string filename = openFileDialog1.FileName;
using (var reader = File.OpenText(#filename))
{
string line;
while ((line = reader.ReadLine()) != null)
{
string[] parts = line.Split(' ');
table.Rows.Add(parts[0], parts[1], parts[2], parts[3], parts[4], parts[5], parts[6], parts[7]);
}
dataGridView1.DataSource = table;
}
}
}
}
EDITED:
I have edited my code as suggested but the value just doesn't display. Please advise.
private void cmdload_Click(object sender, EventArgs e)
{
DataTable table = new DataTable();
table.Columns.Add("Point");
table.Columns.Add("X");
table.Columns.Add("Y");
table.Columns.Add("Z");
table.Columns.Add("R");
table.Columns.Add("A");
table.Columns.Add("B");
table.Columns.Add("C");
Stream myStream = null;
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = "C:\\";
openFileDialog1.Filter = "Data Files (*.PNT)|*.PNT";
openFileDialog1.FilterIndex = 2;
openFileDialog1.RestoreDirectory = true;
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
try
{
if ((myStream = openFileDialog1.OpenFile()) != null)
{
using (myStream)
{
string filename = openFileDialog1.FileName;
using (var reader = File.OpenText(filename))
{
string line;
while ((line = reader.ReadLine()) != null)
{
string[] parts = line.Split(' ');
table.Rows.Add(parts[0], parts[1], parts[2], parts[3], parts[4], parts[5], parts[6], parts[7]);
}
dataGridView1.DataSource = table;
}
}
}
}
catch (Exception ex) // you need to add the catch block if yo are using try block
{
MessageBox.Show("Error: Could not read file from disk. Original error: " + ex.Message);
}
}
}
The text file looks like :
I want the data files to be paste in the gridview when user load the files.
Now i not sure why my code doesnt work. Can anyone give me an advise please?
please have a look :
private void cmdload_Click(object sender, EventArgs e)
{
DataTable table = new DataTable();
table.Columns.Add("Point");
table.Columns.Add("X");
table.Columns.Add("Y");
table.Columns.Add("Z");
table.Columns.Add("R");
table.Columns.Add("A");
table.Columns.Add("B");
table.Columns.Add("C");
Stream myStream = null;
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = "C:\\"; // your directory is also not defined properly
openFileDialog1.Filter = "txt files (*.txt)|*.txt|All files (*.*)|*.*";// have a look to filter as well
openFileDialog1.FilterIndex = 2;
openFileDialog1.RestoreDirectory = true;
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
try
{
if ((myStream = openFileDialog1.OpenFile()) != null)
{
using (myStream)
{
string filename = openFileDialog1.FileName;
using (var reader = File.OpenText(filename)) // you need not to use '#filename' instead use just 'filename'
{
string line;
while ((line = reader.ReadLine()) != null)
{
string[] parts = line.Split(' ');
table.Rows.Add(parts[0], parts[1], parts[2], parts[3], parts[4], parts[5], parts[6], parts[7]);
}
dataGridView1.DataSource = table;
}
}
}
}
catch (Exception ex) // you need to add the catch block if yo are using try block
{
}
}
hope it helps.
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog newOpen = new OpenFileDialog();
DialogResult result = newOpen.ShowDialog();
this.textBox1.Text = result + "";
}
It just returns "OK"
What am I doing wrong? I wish to get the PATH to the file and display it in a text box.
The ShowDialog method returns whether the user pressed OK or Cancel. This is useful information, but the actual filename is stored as a property on the dialog
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog newOpen = new OpenFileDialog();
DialogResult result = newOpen.ShowDialog();
if(result == DialogResult.OK) {
this.textBox1.Text = newOpen.FileName;
}
}
You need to access the filename:
string filename = newOpen.FileName;
or filenames, if you allowed multiple file selection:
newOpen.FileNames;
Ref.: OpenFileDialog Class
private void button1_Click(object sender, System.EventArgs e) {
Stream myStream = null;
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = "c:\\" ;
openFileDialog1.Filter = "txt files (*.txt)|*.txt|All files (*.*)|*.*" ;
openFileDialog1.FilterIndex = 2 ;
openFileDialog1.RestoreDirectory = true ;
if(openFileDialog1.ShowDialog() == DialogResult.OK)
{
try
{
if ((myStream = openFileDialog1.OpenFile()) != null)
{
using (myStream)
{
// Insert code to read the stream here.
}
}
}
catch (Exception ex)
{
MessageBox.Show("Error: Could not read file. Error: " + ex.Message);
}
}
}
You need to read the FileName property of the OpenFileDialog instance. This will get you the path of the selected file.
Here is an example of using an existing file as a default, and getting a new file back:
private string open(string oldFile)
{
OpenFileDialog newOpen = new OpenFileDialog();
if (!string.IsNullOrEmpty(oldFile))
{
newOpen.InitialDirectory = Path.GetDirectoryName(oldFile);
newOpen.FileName = Path.GetFileName(oldFile);
}
newOpen.Filter = "eXtensible Markup Language File (*.xml) |*.xml"; //Optional filter
DialogResult result = newOpen.ShowDialog();
if(result == DialogResult.OK) {
return newOpen.FileName;
}
return string.Empty;
}
Path.GetDirectoryName(file) : Return path
Path.GetFileName(file) : Return filename
I have made an Address Book WinForm in C# and would like to know how to print it as a text file, how would I go about doing this?
I have displayed everything in a DataGridView, I would ideally just like to print the information in the table as text.
you can try like this...
[STAThread]
static void Main()
{
Application.Run(new PrintPreviewDialog());
}
private void btnOpenFile_Click(object sender, System.EventArgs e)
{
openFileDialog.InitialDirectory = #"c:\";
openFileDialog.Filter = "Text files (*.txt)|*.txt|" +
"All files (*.*)|*.*";
openFileDialog.FilterIndex = 1; // 1 based index
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
StreamReader reader = new StreamReader(openFileDialog.FileName);
try
{
strFileName = openFileDialog.FileName;
txtFile.Text = reader.ReadToEnd();
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
return;
}
finally
{
reader.Close();
}
}
}
private void btnSaveFile_Click(object sender, System.EventArgs e)
{
SaveFileDialog sfd = new SaveFileDialog();
sfd.InitialDirectory = #"c:\";
sfd.Filter = "Text files (*.txt)|*.txt|" +
"All files (*.*)|*.*";
sfd.FilterIndex = 1; // 1 based index
if (strFileName != null)
sfd.FileName = strFileName;
else
sfd.FileName = "*.txt";
if (sfd.ShowDialog() == DialogResult.OK)
{
StreamWriter writer = new StreamWriter(strFileName,false);
try
{
strFileName = sfd.FileName;
writer.Write(txtFile.Text);
}
catch(Exception ex)
{
MessageBox.Show(ex.Message);
return;
}
finally
{
writer.Close();
}
}
}
//here you can print form as text file by clicking on the button..
private void btnPageSetup_Click(object sender, System.EventArgs e)
{
PageSetupDialog psd = new PageSetupDialog();
psd.Document = printDocument;
psd.ShowDialog();
}
private void btnPrint_Click(object sender, System.EventArgs e)
{
PrintDialog pdlg = new PrintDialog();
pdlg.Document = printDocument;
if (pdlg.ShowDialog() == DialogResult.OK)
{
try
{
printDocument.Print();
}
catch(Exception ex)
{
MessageBox.Show("Print error: " + ex.Message);
}
}
}
private void btnPrintPreview_Click(object sender, System.EventArgs e)
{
PrintPreviewDialog ppdlg = new PrintPreviewDialog();
ppdlg.Document = printDocument;
ppdlg.ShowDialog();
}
private void pdPrintPage(object sender, PrintPageEventArgs e)
{
float linesPerPage = 0;
float verticalOffset = 0;
float leftMargin = e.MarginBounds.Left;
float topMargin = e.MarginBounds.Top;
int linesPrinted = 0;
String strLine = null;
linesPerPage = e.MarginBounds.Height / currentFont.GetHeight(e.Graphics);
while (linesPrinted < linesPerPage &&
((strLine = stringReader.ReadLine())!= null ))
{
verticalOffset = topMargin + (linesPrinted * currentFont.GetHeight(e.Graphics));
e.Graphics.DrawString(strLine, currentFont, Brushes.Black, leftMargin, verticalOffset);
linesPrinted++;
}
if (strLine != null)
e.HasMorePages = true;
else
e.HasMorePages = false;
}
private void pdBeginPrint(object sender, PrintEventArgs e)
{
stringReader = new StringReader(txtFile.Text);
currentFont = txtFile.Font;
}
private void pdEndPrint(object sender, PrintEventArgs e)
{
stringReader.Close();
MessageBox.Show("Done printing.");
}
}
Preview and Print from Your Windows Forms App with the .NET Printing Namespace
http://msdn.microsoft.com/en-us/magazine/cc188767.aspx
It's a little old (2003) but still looks relevent.
you should give more details on what you want to do.
how do you intend to print the form as text file? How do you convert the graphics like labels, buttons and other controls into text?
what you ask is possible and you can control every aspect of the printed content in both ways graphic rendering or text only, have a look here as starting point:
Windows Forms Print Support
The simpliest way is to create a text file and write the values in it. Like this:
var textFile = File.CreateText("Address.txt");
textFile.WriteLine("Name: Fischermaen");
textFile.Close();