Perform OCR via MODI on specific region instead of entire document - c#

I am trying to get to perform bulk OCR on images in a selected directory via MODI. Here's the code.
private void button1_Click(object sender, EventArgs e) {
CommonOpenFileDialog dialog = new CommonOpenFileDialog {
InitialDirectory = "C:\\Users",
IsFolderPicker = true
};
if (dialog.ShowDialog() == CommonFileDialogResult.Ok) {
invoicePath = dialog.FileName;
CheckFileAndDoOCR(imageDirectory);
}
}
public string CheckFileAndDoOCR(string directoryPath) {
string TheTxt = "";
IEnumerator files = Directory.GetFiles(directoryPath).GetEnumerator();
while (files.MoveNext()) {
FileInfo foo = new FileInfo(Convert.ToString(files.Current));
if (foo.Extension == ".jpg" || foo.Extension == ".JPG") {
TheTxt = DoOCR(foo.FullName);
string txtFileName = foo.DirectoryName + "\\" + foo.Name.Replace(foo.Extension, "") + ".txt";
FileStream createFile = new FileStream(txtFileName, FileMode.OpenOrCreate);
StreamWriter writeFile = new StreamWriter(createFile);
writeFile.Write(TheTxt);
writeFile.Close();
createFile.Close();
}
try {
foo.Delete();
}
catch (Exception ex) {
MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
return TheTxt;
}
public string DoOCR(string FullPath) {
MODI.Document miDoc;
MODI.Word miWord;
MODI.IMiRects miRects;
MODI.IMiRect miRect;
string strRectInfo;
miRect = null;
string txt;
string word;
MODI.Document md = new MODI.Document();
md.Create(FullPath);
md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, false, false);
MODI.Image image = (MODI.Image)md.Images[0];
txt = image.Layout.Text;
word = null;
image = null;
md.Close(false);
md = null;
GC.Collect();
GC.WaitForPendingFinalizers();
return txt;
}
How can I perform the OCR on a specific area, for example
Rectangle Area = new Rectangle() {
X = 1367,
Y = 420,
Height = 57,
Width = 411
};
I have to extract data from one region/rectangle only from all the images. How can I do that? I have tried IronOCR and Tesseract, works like a charm. But I have been told to use MODI. Please help.

Related

Trying to export telerik winform grouped grid data to excel?

When i export grid data without grouping it exports perfectly but when i export grid data with grouping it skips rows and if i remove header rows then data is exported perfectly with or without grouping?
I think the problem is in header rows when i remove header rows then it works perfectly.
Please tell me how can i adjust header rows so that grouped data can be exported perfectly
private void btnExport_Click(object sender, EventArgs e)
{
saveFileDialog1.FileName = this.ReportHeaderText.Replace(' ', '-').Replace('/', '-');
saveFileDialog1.OverwritePrompt = true;
if (saveFileDialog1.ShowDialog() != DialogResult.OK)
{
return;
}
if (saveFileDialog1.CheckFileExists)
{
}
if (saveFileDialog1.FileName.Equals(String.Empty))
{
RiceMsgBox.ShowErrorBox("Please enter a file name.");
return;
}
string fileName = this.saveFileDialog1.FileName;
bool openExportFile = false;
RunExportToExcelML(fileName, ref openExportFile);
if (openExportFile)
{
try
{
System.Diagnostics.Process.Start(fileName);
}
catch (Exception ex)
{
RiceMsgBox.ShowErrorBox("The file cannot be opened on your system");
}
}
this.tabControl1.SelectedIndex = 1;
}
private void RunExportToExcelML(string fileName, ref bool openExportFile)
{
Telerik.WinControls.Export.GridViewSpreadExport exporter = new Telerik.WinControls.Export.GridViewSpreadExport(gridReport, 0);
exporter.HiddenColumnOption = Telerik.WinControls.UI.Export.HiddenOption.DoNotExport;
exporter.CellFormatting += exporter_CellFormatting;
exporter.ExportVisualSettings = true;
exporter.SheetMaxRows = ExcelMaxRows._1048576;
exporter.SheetName = System.Text.RegularExpressions.Regex.Replace(this.ReportHeaderText.Length > 30 ? this.ReportHeaderText.Substring(0,30) : this.ReportHeaderText, #"[^0-9a-zA-Z]+", ",");
exporter.SummariesExportOption = SummariesOption.ExportAll;
Telerik.WinControls.Export.SpreadExportRenderer exportRenderer = new Telerik.WinControls.Export.SpreadExportRenderer();
exportRenderer.WorkbookCreated += renderer_WorkbookCreated;
// exportRenderer.ExcelTableCreated += exporter_ExcelTableCreated;
//exporter.CellFormatting += exporter_ExcelCellFormatting;
//FormatGridColumns(gridReport);
try
{
exporter.RunExport(fileName, exportRenderer);
var dialog = RiceMsgBox.GetQuestionBox("The data in the grid was exported successfully. Do you want to open the file?");
if (dialog == DialogResult.Yes)
{
openExportFile = true;
}
else
{
openExportFile = false;
}
}
catch(Exception ex)
{
RiceMsgBox.ShowErrorBox("Error exporting data.");
}
}
void exporter_CellFormatting(object sender, Telerik.WinControls.Export.CellFormattingEventArgs e)
{
CellBorders borders = new CellBorders();
borders.Top = new CellBorder(CellBorderStyle.Thin, new ThemableColor(System.Windows.Media.Colors.Black));
borders.Bottom = new CellBorder(CellBorderStyle.Thin, new ThemableColor(System.Windows.Media.Colors.Black));
borders.Right = new CellBorder(CellBorderStyle.Thin, new ThemableColor(System.Windows.Media.Colors.Black));
borders.Left = new CellBorder(CellBorderStyle.Thin, new ThemableColor(System.Windows.Media.Colors.Black));
e.CellStyleInfo.Borders = borders;
}
void renderer_WorkbookCreated(object sender, Telerik.WinControls.Export.WorkbookCreatedEventArgs e)
{
PatternFill solidPatternFill = new PatternFill(PatternType.Solid, System.Windows.Media.Colors.Transparent, System.Windows.Media.Colors.Transparent);
CellValueFormat textFormat = new CellValueFormat("#");
string dateRange = "( From Date : " + dtpFromDate.Text + " - To Date : " + dtpToDate.Text + " )";
Worksheet worksheet = e.Workbook.Sheets[0] as Worksheet;
worksheet.Columns[worksheet.UsedCellRange].AutoFitWidth();
CellRange range = new CellRange(0, 0, 1, gridReport.Columns.Count);
CellSelection header = worksheet.Cells[range];
if (header.CanInsertOrRemove(range, ShiftType.Down))
{
header.Insert(InsertShiftType.Down);
}
header.Merge();
header.SetFormat(textFormat);
header.SetHorizontalAlignment(Telerik.Windows.Documents.Spreadsheet.Model.RadHorizontalAlignment.Center);
header.SetVerticalAlignment(Telerik.Windows.Documents.Spreadsheet.Model.RadVerticalAlignment.Center);
header.SetFontFamily(new ThemableFontFamily("Rockwell"));
header.SetFontSize(24);
header.SetFill(solidPatternFill);
header.SetValue(this.ReportHeaderText);
}
The GridViewSpreadExport generates a document that consists of merged cells. Inserting a row on the top and then exporting the document causes wrong merged cells. It is a known issue: link
As a workaround, instead of using the WorkbookCreated event to insert a new row you can export the document and then reopen it and insert a row above the exported grid data.
private void RunExportToExcelML(string fileName, ref bool openExportFile)
{
Telerik.WinControls.Export.GridViewSpreadExport exporter = new Telerik.WinControls.Export.GridViewSpreadExport(gridReport, 0);
exporter.HiddenColumnOption = Telerik.WinControls.UI.Export.HiddenOption.DoNotExport;
exporter.CellFormatting += exporter_CellFormatting;
exporter.ExportVisualSettings = true;
exporter.SheetMaxRows = ExcelMaxRows._1048576;
exporter.SheetName = System.Text.RegularExpressions.Regex.Replace(this.ReportHeaderText.Length > 30 ? this.ReportHeaderText.Substring(0, 30) : this.ReportHeaderText, #"[^0-9a-zA-Z]+", ",");
exporter.SummariesExportOption = SummariesOption.ExportAll;
Telerik.WinControls.Export.SpreadExportRenderer exportRenderer = new Telerik.WinControls.Export.SpreadExportRenderer();
//exportRenderer.WorkbookCreated += renderer_WorkbookCreated;
try
{
exporter.RunExport(fileName, exportRenderer);
this.InsertHeader(fileName);
// more code...
}
private void InsertHeader(string fileName)
{
XlsxFormatProvider formatProvider = new XlsxFormatProvider();
Workbook workbook = null;
using (Stream stream = new FileStream(fileName, FileMode.Open))
{
workbook = formatProvider.Import(stream);
}
PatternFill solidPatternFill = new PatternFill(PatternType.Solid, System.Windows.Media.Colors.Transparent, System.Windows.Media.Colors.Transparent);
CellValueFormat textFormat = new CellValueFormat("#");
//string dateRange = "( From Date : " + dtpFromDate.Text + " - To Date : " + dtpToDate.Text + " )";
Worksheet worksheet = workbook.Sheets[0] as Worksheet;
worksheet.Columns[worksheet.UsedCellRange].AutoFitWidth();
CellRange range = new CellRange(0, 0, 1, gridReport.Columns.Count);
CellSelection header = worksheet.Cells[range];
if (header.CanInsertOrRemove(range, ShiftType.Down))
{
header.Insert(InsertShiftType.Down);
}
header.Merge();
header.SetFormat(textFormat);
header.SetHorizontalAlignment(Telerik.Windows.Documents.Spreadsheet.Model.RadHorizontalAlignment.Center);
header.SetVerticalAlignment(Telerik.Windows.Documents.Spreadsheet.Model.RadVerticalAlignment.Center);
header.SetFontFamily(new ThemableFontFamily("Rockwell"));
header.SetFontSize(24);
header.SetFill(solidPatternFill);
header.SetValue(this.ReportHeaderText);
using (Stream output = new FileStream(fileName, FileMode.Create))
{
formatProvider.Export(workbook, output);
}
}
For more information about SpreadProcessing visit the following link: https://docs.telerik.com/devtools/document-processing/libraries/radspreadprocessing/overview

convert a jpg/png/txt or any file format to pdf using mvc5

How do i convert a jpg/png/txt or any file format to pdf using mvc c#.
Here is the code:
public ActionResult SaveProfileDocument(string code)
{
bool isSavedSuccessfully = true;
string fName = "";
string _documentname = String.Empty;
try
{
foreach (string fileName in Request.Files)
{
HttpPostedFileBase file = Request.Files[fileName];
//Save file content goes here
fName = file.FileName;
if (file != null && file.ContentLength > 0)
{
var originalDirectory = new DirectoryInfo(string.Format("{0}Documents\\Profile\\" + code, Server.MapPath(#"\")));
string pathString = System.IO.Path.Combine(originalDirectory.ToString());
var fileName1 = Path.GetFileName(file.FileName);
bool isExists = System.IO.Directory.Exists(pathString);
if (!isExists)
System.IO.Directory.CreateDirectory(pathString);
_documentname=fName;
var path = string.Format("{0}\\{1}", pathString, file.FileName);
if (System.IO.File.Exists(path)) {
_documentname=Guid.NewGuid()+"_"+file.FileName;
var path2 = string.Format("{0}\\{1}", pathString,_documentname );
file.SaveAs(path2);
}
else {
file.SaveAs(path);
}
}
}
}
catch (Exception ex)
{
isSavedSuccessfully = false;
}
if (isSavedSuccessfully)
{
return Json(new { Message = fName, documentname = _documentname });
}
else
{
return Json(new { Message = "Error in saving file", documentname=""});
}
}
In the above code i am saving the file.but
here i need to convert the file and then save.
so for convert i need a separate class or method here only call that method.
The thing is that while upload a file inthat time need to convert pdf any file to convert pdf. and save in folder or whatever.
can't convert an image file to PDF. You can create a PDF file and add the image file to it:
string pdfpath = Server.MapPath("PDFs");
string imagepath = Server.MapPath("Images");
Document doc = new Document();
try
{
PdfWriter.GetInstance(doc, new FileStream(pdfpath + "/Images.pdf", FileMode.Create));
doc.Open();
doc.Add(new Paragraph("GIF"));
Image gif = Image.GetInstance(imagepath + "/mikesdotnetting.gif");
doc.Add(gif);
}
catch (Exception ex)
{
//Log error;
}
finally
{
doc.Close();
}
here i am refer:
https://www.mikesdotnetting.com/article/87/itextsharp-working-with-images

Editing gridview excel export

I currently implemented a method such that it'll export to excel any RadGridView passed as a parameter. It exports completely fine, I want to enhance it by adding a Title to the first row of the excel file then append the RadGridView underneath that row. May I ask if anyone has an idea how abouts I should do that?
public static void Export(RadGridView grid)
{
const string extension = "xls";
var dialog = new SaveFileDialog
{
DefaultExt = extension,
Filter = String.Format("{1} files (*.{0})|*.{0}|All files (*.*)|*.*", extension, "Excel"),
FilterIndex = 1
};
if (dialog.ShowDialog() != true)
{
return;
}
using (var stream = dialog.OpenFile())
{
var exportOptions = new GridViewExportOptions
{
Format = ExportFormat.ExcelML,
ShowColumnHeaders = true,
ShowColumnFooters = true,
ShowGroupFooters = false,
};
exportOptions.Items = (IEnumerable)grid.ItemsSource;
grid.Export(stream, exportOptions);
}
}
If it interest anyone. Here's the solution I used,
public static void ExportWithHeader(RadGridView grid, string header)
{
try
{
string extension = "xls";
SaveFileDialog dialog = new SaveFileDialog()
{
DefaultExt = extension,
Filter = String.Format("{1} files (*.{0})|*.{0}|All files (*.*)|*.*", extension, "Excel"),
FilterIndex = 1,
FileName = header
};
if (dialog.ShowDialog() == true)
{
using (Stream stream = dialog.OpenFile())
{
MemoryStream ms = new MemoryStream();
grid.Export(
ms,
new GridViewExportOptions()
{
Format = ExportFormat.ExcelML,
ShowColumnHeaders = true,
ShowColumnFooters = true,
ShowGroupFooters = false,
});
ms.Seek(0, SeekOrigin.Begin);
header = String.Format(
"<Row><Cell ss:Index='1'><Data ss:Type='String'>{0}</Data></Cell></Row>", header);
StreamReader sr = new StreamReader(ms);
string msStr = sr.ReadToEnd();
msStr = msStr.Insert(msStr.IndexOf("<Row>"), header);
stream.Write(Encoding.UTF8.GetBytes(msStr), 0, msStr.Length);
}
Process.Start(dialog.FileName);
}
}
catch
{
Notification.Error("Process Busy", "Please exit excel instance.");
}
}

Dynamically Insert Image to PDF using LiveCycle/ITextSharp

I have a pdf template I created using LiveCycle Designer. Inside it, I have 3 Image Fields that I created, ImageField1, ImageField2, ImageField3. The images are located on a url, let's call it "http://images.com/img/IMAGENAME.jpg", and the user selects the images prior to generating the pdf in which case I store the image names in a string array.
Is it possible to add these images programmatically into the Image Fields? The methods I've tried so far have only lead to a corrupted pdf that won't open at all.
public string Foo(int id)
{
try
{
var file = string.Empty;
var property = ((IRepositoryBase)PropertyRepository).GetById<Property>(id);
var purchase = ((IRepositoryBase)PropertyRepository).GetByPropertyId<PropertyPurchase>(id);
var inspection = ((IRepositoryBase)PropertyRepository).GetByPropertyId<PropertyInspection>(id);
file = HttpContext.Current.Server.MapPath("\\Assets\\documents\\originals\\Brochure.pdf");
var tmp = HttpContext.Current.Server.MapPath("\\Assets\\documents\\temps\\");
tmp += string.Format("{0}-Brochure.pdf", property.Id);
var pdfReader = new PdfReader(file);
var pdfStamper = new PdfStamper(pdfReader, new FileStream(tmp, FileMode.Create));
var pdfFormFields = pdfStamper.AcroFields;
var pht = property.BrochurePhoto;
string[] photos = pht.Split(' ');
PdfContentByte cB = new PdfContentByte(pdfStamper.Writer);
if (photos[0] != null)
{
iTextSharp.text.Image photoToPdf1 = iTextSharp.text.Image.GetInstance(new Uri("http://images.com/img/" + photos[0].ToString() + ".jpg"));
cB.AddImage(photoToPdf1);
}
if (photos[1] != null)
{
iTextSharp.text.Image photoToPdf2 = iTextSharp.text.Image.GetInstance(new Uri("http://images.com/img/" + photos[1].ToString() + ".jpg"));
cB.AddImage(photoToPdf2);
}
if (photos[2] != null)
{
iTextSharp.text.Image photoToPdf3 = iTextSharp.text.Image.GetInstance(new Uri("http://images.com/img/" + photos[2].ToString() + ".jpg"));
cB.AddImage(photoToPdf3);
}
pdfStamper.FormFlattening = false;
pdfStamper.Close();
return string.Format("{0}-Brochure.pdf", property.Id);
}
catch (Exception ex)
{
Log.Error(ex);
return string.Empty;
}
}

Error "EOF In Header" with version SharpZipLib 0.86.0.518

I use VS 2008 SP1 in Windows XP.
I have updated ICSharpCode.SharpZipLib.dll from older 0.85.4.369 to new 0.86.0.518.
I have been using it successfully for older 0.85.4.369.
I have been able to zip and unzip both files and folders without a problem - well, until now.
But now, I get error "EOF In Header" when read a ZIP file that I have generated using ICSharpCode.SharpZipLib.dll too.
My code C# is the same, no changes about it.
Fails: theEntry = z.GetNextEntry();
public static string LeerEtiquetaEmpaquetado(string zipFic)
{
ZipInputStream z = null;
DatosEmpaquetado datEmp;
try
{
z = new ZipInputStream(File.OpenRead(zipFic));
ZipEntry theEntry;
do
{
theEntry = z.GetNextEntry();
if (theEntry != null)
{
if (theEntry.Name.EndsWith("empaquetado.xml"))
{
using (MemoryStream memWrt = new MemoryStream())
{
int size = 2048;
byte[] data = new byte[2048];
do
{
size = z.Read(data, 0, data.Length);
if ((size > 0))
{
memWrt.Write(data, 0, size);
}
else
{
break;
}
} while (true);
datEmp = LeerEmpaquetado(memWrt);
return datEmp.Etiqueta;
}
break;
}
}
else
{
break;
}
} while (true);
return null;
}
catch (Exception exc)
{
System.Diagnostics.Trace.WriteLine("Excepción: " + exc.Message);
System.Diagnostics.Trace.WriteLine(exc.StackTrace);
throw;
}
finally
{
if (z != null)
{
z.Close();
z.Dispose();
}
}
}
The ICSharpCode.SharpZipLib.dll ( 0.86.0.518 ) seems unable to open the ZIPs it just created.
Very Strange thing is:
The newly-created files open just fine in WinRAR.
ZIP files created with previous versions of the DLL open just fine with the new DLL.
Code for ZIP file:
public static void EmpaquetarProyecto(string dirOutput, string nombre, string dirDestino)
{
string dirActual = Environment.CurrentDirectory;
Environment.CurrentDirectory = dirOutput;
string[] fileNames = Directory.GetFiles(".", "*.*", SearchOption.AllDirectories);
try
{
Crc32 objCrc32 = new Crc32();
ZipOutputStream strmZipOutputStream;
nombre = Path.Combine(dirDestino, nombre + ".zip");
strmZipOutputStream = new ZipOutputStream(File.Create(nombre));
strmZipOutputStream.SetLevel(6);
foreach (string aux in fileNames)
{
string strFile = aux;
if (strFile.StartsWith(".\\"))
{
strFile = strFile.Substring(2);
}
FileStream strmFile = File.OpenRead(strFile);
byte[] abyBuffer = new byte[(Convert.ToInt32(strmFile.Length))];
strmFile.Read(abyBuffer, 0, abyBuffer.Length);
ZipEntry theEntry = new ZipEntry(strFile);
FileInfo fi = new FileInfo(strFile);
theEntry.DateTime = fi.LastWriteTime;
theEntry.Size = strmFile.Length;
strmFile.Close();
objCrc32.Reset();
objCrc32.Update(abyBuffer);
theEntry.Crc = objCrc32.Value;
strmZipOutputStream.PutNextEntry(theEntry);
strmZipOutputStream.Write(abyBuffer, 0, abyBuffer.Length);
}
strmZipOutputStream.Finish();
strmZipOutputStream.Close();
}
finally
{
Environment.CurrentDirectory = dirActual;
}
}
Perhaps error is aboutCRC, I think.
Any ideas about it? any changes in my code ?
edit: If delete code about CRC , it works, but why ??

Categories

Resources