Unresponsive UI using background worker, processing files are very slow - c#

Unresponsive UI using background worker, processing files are very slow.
Two different issues encountered here. GDpicture SDK is used for image processing. CPU Utilization is bare minimum, how can I maximize performance, ultimately have responsive and fast wpf application.
namespace OCR
{
public partial class MainWindow : Window
{
BackgroundWorker bw;/*= new BackgroundWorker();*/
private SynchronizationContext threadSyn = null;
string log_cap = string.Empty;
List<string> log_list = new List<string>();
string value = "Merged";
public MainWindow()
{
try
{
InitializeComponent();
InitializeBackgroundWorker();
File_process();
string configpath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"path.txt");
string[] configfile = File.ReadAllLines(configpath);
if (configfile.Length > 1)
{
ip.Text = configfile[0];
op.Text = configfile[1];
ex_tb.Text = configfile[2];
Protb.Text = configfile[3];
}
cbPDFConform.Items.Clear();
for (int i = 0; i < Enum.GetNames(typeof(PdfConformance)).Length - 1; i++)
{
ComboBoxItem cbi = new ComboBoxItem();
cbi.Content = Enum.GetName(typeof(PdfConformance), (PdfConformance)i);
PdfConformance test = (PdfConformance)i;
cbi.Tag = (PdfConformance)i;
cbPDFConform.Items.Add(cbi);
}
cbPDFConform.SelectedIndex = 0;
cbProcessorCount.Items.Clear();
for (int i = 1; i <= Environment.ProcessorCount; i++)
{
cbProcessorCount.Items.Add(i.ToString());
if (Environment.ProcessorCount / 2 == i) { cbProcessorCount.SelectedIndex = i - 1; }
}
LicenseManager oLicenseManager = new LicenseManager();
oLicenseManager.RegisterKEY("");
configpath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"path.txt");
configfile = File.ReadAllLines(configpath);
if (configfile.Length > 1)
{
ip.Text = configfile[0];
op.Text = configfile[1];
ex_tb.Text = configfile[2];
Protb.Text = configfile[3];
}
GrantAccess(ip.Text);
GrantAccess(op.Text);
GrantAccess(ex_tb.Text);
GrantAccess(Protb.Text);
threadSyn = SynchronizationContext.Current;
}
catch (Exception e1)
{ MessageBox.Show("e1" + e1.Message); }
}
private void InitializeBackgroundWorker()
{
bw = new BackgroundWorker();
bw.DoWork += Bw_DoWork;
bw.WorkerSupportsCancellation = true;
}
public async void File_process()
{
await Task.Run(() => converttiffpdfreducer());
}
private void Bw_DoWork(object sender, DoWorkEventArgs e)
{
this.Dispatcher.Invoke(() =>
{
try
{
using (StreamWriter sw = new StreamWriter(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"path.txt"), false))
{
sw.WriteLine(ip.Text);
sw.WriteLine(op.Text);
sw.WriteLine(ex_tb.Text);
sw.WriteLine(Protb.Text);
sw.Close();
}
ParallelOptions parallelOptions = new ParallelOptions();
parallelOptions.MaxDegreeOfParallelism = int.Parse(cbProcessorCount.SelectedItem.ToString());
var watch1 = new System.Diagnostics.Stopwatch();
watch1.Start();
converttiffpdfreducer();
//deletenew();
watch1.Stop();
TimeSpan ts1 = watch1.Elapsed;
ts1.ToString("mm\\:ss");
if (MergeChk.IsChecked == false)
{
value = "OCRed";
}
WriteLn("All documents have been successfully " + value + " " + ts1 + " " + DateTime.Now +" "+Environment.UserName);
}
catch (Exception DOwork)
{ MessageBox.Show("e2 " + DOwork.Message); }
});
}
private void GrantAccess(string fullPath)
{
DirectoryInfo dInfo = new DirectoryInfo(fullPath);
DirectorySecurity dSecurity = dInfo.GetAccessControl();
dSecurity.AddAccessRule(new FileSystemAccessRule(new SecurityIdentifier(WellKnownSidType.WorldSid, null), FileSystemRights.FullControl, InheritanceFlags.ObjectInherit | InheritanceFlags.ContainerInherit, PropagationFlags.NoPropagateInherit, AccessControlType.Allow));
dInfo.SetAccessControl(dSecurity);
}
private string[] mutliocr(string[] arr)
{
string box = string.Empty;
string box1 = string.Empty;
try
{
string filepath = string.Empty;
string outpath = ex_tb.Text;
if (MergeChk.IsChecked == true)
{ filepath = op.Text; }
else if (MergeChk.IsChecked == false)
{ filepath = Protb.Text; }
System.Windows.Threading.Dispatcher.CurrentDispatcher.Invoke((Action)(() =>
{
Thread.CurrentThread.IsBackground = true;
var watch2 = new System.Diagnostics.Stopwatch();
watch2.Start();
string[] getfilearray = arr;
for (int f = 0; f < getfilearray.Length; f++)
{
string dirName = Directory.GetParent(getfilearray[f]).FullName;
string folder = Directory.GetParent(getfilearray[f]).FullName;
box = Path.GetDirectoryName(getfilearray[f]);
box1 = Path.GetDirectoryName(box);
string getextension = Path.GetExtension(getfilearray[f]);
string[] newF = Directory.EnumerateFiles(dirName, "*.*", SearchOption.AllDirectories).ToArray();
string FN = Directory.GetParent(getfilearray[f]).Name;
string ocrfolder = (new FileInfo(getfilearray[f]).Directory.FullName);
string filen = Path.Combine(ocrfolder, folder, FN + "-ocr" + getextension);
string dict = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Redist", "OCR");
if (!Directory.Exists(ocrfolder))
{
Directory.CreateDirectory(ocrfolder);
}
GrantAccess(ocrfolder);
GdPicturePDF oGdPicturePDF = new GdPicturePDF();
oGdPicturePDF.OcrPagesDone += OcrPagesDone;
void OcrPagesDone(GdPictureStatus status1)
{
if (oGdPicturePDF.SaveToFile(filen) == GdPictureStatus.OK)
{ }
else
MessageBox.Show("PDF: The OCR-ed file has failed to save. Status: " + oGdPicturePDF.GetStat().ToString());
}
GdPictureStatus status = GdPictureStatus.OK;
if (oGdPicturePDF.LoadFromFile(getfilearray[f], false) == GdPictureStatus.OK)
if (status == GdPictureStatus.OK)
{
if (oGdPicturePDF.OcrPages_4("*", 0, "eng", dict, "", 300, OCRMode.FavorSpeed, 1, true) == GdPictureStatus.OK)
if (status == GdPictureStatus.OK)
{ }
else
{ MessageBox.Show("PDF: The OCR process has failed. Status: " + status.ToString()); }
}
else
{ MessageBox.Show("PDF: The PDF file has failed to load. Status: " + status.ToString()); }
oGdPicturePDF.Dispose();
GrantAccess(getfilearray[f]);
File.Delete(getfilearray[f]);
watch2.Stop();
TimeSpan ts2 = watch2.Elapsed;
ts2.ToString("mm\\:ss");
WriteLn(" OCR pages " + filen.Replace(op.Text, "") + " " + ts2 + " " + DateTime.Now);
}
if (MergeChk.IsChecked == true)
{
foreach (string str in Directory.EnumerateFiles(op.Text, "*.pdf", SearchOption.AllDirectories).ToArray())
{
if (Path.GetFileNameWithoutExtension(str).EndsWith("-ocr"))
File.Move(str, Path.Combine(Path.GetDirectoryName(str), Path.GetFileNameWithoutExtension(str).Substring(0, Path.GetFileNameWithoutExtension(str).Length - 4) + ".pdf"));
}
}
if (MergeChk.IsChecked == false)
{
FileSystem.MoveDirectory(Protb.Text, op.Text, UIOption.AllDialogs);
Directory.CreateDirectory(Protb.Text);
string FF = string.Empty;
foreach (string str in Directory.EnumerateFiles(op.Text, "*.pdf", SearchOption.AllDirectories))
{
if (Path.GetFileNameWithoutExtension(str).EndsWith("-ocr"))
File.Move(str, Path.Combine(Path.GetDirectoryName(str), Path.GetFileNameWithoutExtension(str).Substring(0, Path.GetFileNameWithoutExtension(str).Length - 4) + ".pdf"));
}
}
}));
}
catch (Exception mul)
{
}
return arr;
}
public static string browseFolder()
{
Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
System.Windows.Forms.FolderBrowserDialog fbd = new System.Windows.Forms.FolderBrowserDialog();
System.Windows.Forms.DialogResult result = fbd.ShowDialog();
string path = string.Empty;
if (result == (System.Windows.Forms.DialogResult)MessageBoxResult.OK)
{
path = fbd.SelectedPath;
if (path[path.Length - 1] != '\\')
{
path = path + "\\";
}
}
return path;
}
private string[] converttiffpdfreducer()
{
string[] dir = null;
string box = string.Empty;
string box1 = string.Empty;
string[] gg = null;
try
{
string filepath = ip.Text;
string outpath = Protb.Text;
System.Windows.Threading.Dispatcher.CurrentDispatcher.Invoke((Action)(() =>
{
PdfConformance optPDFConform = PdfConformance.Unknown;
dir = Directory.EnumerateDirectories(filepath, "*.*", SearchOption.AllDirectories).Where(l => l.Length != 0).OrderBy(f => f).ToArray();
for (int ad = 0; ad < dir.Length; ad++)
{ string[] getfilearray = Directory.EnumerateFiles(dir[ad], "*.*", SearchOption.AllDirectories).ToArray();
if (getfilearray.Length == 0)
break;
if (getfilearray.Length != 0)
for (int f = 0; f < getfilearray.Length; f++)
{
string getext = Path.GetExtension(getfilearray[f]);
string fd = Path.GetDirectoryName(getfilearray[f]);
string op_path = fd.Replace(filepath, Protb.Text);
string getextension = Path.GetExtension(getfilearray[f]);
string dict = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Redist", "OCR");
string outputPath = fd.Replace(filepath, outpath);
string FNAME = Path.GetFileNameWithoutExtension(getfilearray[f]);
string fn = Path.GetDirectoryName(getfilearray[f]).Replace(filepath, outpath);
string filen = Path.Combine(outputPath, fn, FNAME + ".pdf");
string savefile = Path.Combine(op_path, filen);
string pathString = getfilearray[f];
box = Path.GetDirectoryName(getfilearray[f]);
box1 = Path.GetDirectoryName(box);
using (GdPictureDocumentConverter oConverter = new GdPictureDocumentConverter()) {
GdPictureStatus status = new GdPictureStatus();
if (Path.GetExtension(getfilearray[f]).ToUpper() == ".PDF")
{
status = oConverter.LoadFromFile(getfilearray[f], GdPicture14.DocumentFormat.DocumentFormatPDF);
}
else if (Path.GetExtension(getfilearray[f]).ToUpper() == ".TIF" || Path.GetExtension(getfilearray[f]).ToUpper() == ".TIFF")
{
status = oConverter.LoadFromFile(getfilearray[f], GdPicture14.DocumentFormat.DocumentFormatTIFF);
}
else if (Path.GetExtension(getfilearray[f]).ToUpper() == ".JPG")
{
status = oConverter.LoadFromFile(getfilearray[f], GdPicture14.DocumentFormat.DocumentFormatJPEG);
}
if (status == GdPictureStatus.OK)
{
if (!Directory.Exists(op_path))
{
Directory.CreateDirectory(op_path);
}
GrantAccess(op_path);
optPDFConform = (PdfConformance)((ComboBoxItem)cbPDFConform.SelectedItem).Tag;
status = oConverter.SaveAsPDF(savefile, optPDFConform);
if (status == GdPictureStatus.OK)
{ }
else
{ }
}
else
{ }
}
}
string BOXX = box.Replace(ip.Text, Protb.Text);
string[] Arr = Directory.EnumerateFiles(BOXX, "*.pdf", SearchOption.AllDirectories).ToArray();
if (MergeChk.IsChecked == true)
{ merge(Arr); }
else if (MergeChk.IsChecked == false)
{
mutliocr(Arr);
}
}
}));
}
catch (Exception ee)
{ }
return dir;
}
private string[] merge(string[] arr)
{
string box = string.Empty;
string box1 = string.Empty; string[] gg = null;
System.Windows.Threading.Dispatcher.CurrentDispatcher.Invoke((Action)(() =>
{
box = Path.GetDirectoryName(arr[0]);
box1 = Path.GetDirectoryName(box);
string dirName = Directory.GetParent(arr[0]).FullName;
string BOXFILES = Path.GetDirectoryName(dirName);
string folder = Directory.GetParent(arr[0]).FullName.Replace(Protb.Text, op.Text);
string ocrfolder = (new FileInfo(arr[0]).Directory.FullName).Replace(Protb.Text, op.Text);
string fn = Directory.GetParent(arr[0]).Name;
string filen = Path.Combine(ocrfolder, folder, fn + ".pdf");
if (!Directory.Exists(ocrfolder))
{
Directory.CreateDirectory(ocrfolder);
}
GrantAccess(ocrfolder);
using (GdPicturePDF oGdPicturePDF = new GdPicturePDF())
{
GdPictureStatus status = oGdPicturePDF.MergeDocuments(ref arr, filen);
if (status == GdPictureStatus.OK)
{ }
else
{ }
oGdPicturePDF.Dispose();
}
Directory.Delete(box, true);
string BOXX = box.Replace(Protb.Text, op.Text);//op
string[] files = Directory.EnumerateFiles(BOXX, "*.pdf", SearchOption.AllDirectories).ToArray();
if (MergeChk.IsChecked == true)
{ mutliocr(files); }
}));
return gg;
}
private void inbtn_Click(object sender, RoutedEventArgs e)
{
try
{ ip.Text = browseFolder(); }
catch (Exception e7)
{ MessageBox.Show("e7" + e7.Message); }
}
private void obtn_Click(object sender, RoutedEventArgs e)
{
try
{ op.Text = browseFolder(); }
catch (Exception e8)
{ MessageBox.Show("e8" + e8.Message); }
}
private void start_btn_Click(object sender, RoutedEventArgs e)
{
if (!bw.IsBusy)
{
// Cancel the asynchronous operation.
this.bw.CancelAsync();
// Disable the Cancel button.
bw.RunWorkerAsync();
start_btn.Content = "Stop";
//this.Status.Content = "Running....";
}
else
{
bw.CancelAsync();
start_btn.Content = "Start";
//this.Status.Content = "Stopped....";
}
}
private void pro_btn_Click(object sender, RoutedEventArgs e)
{
try
{ Protb.Text = browseFolder(); }
catch (Exception e10)
{ MessageBox.Show("e10" + e10.Message); }
}
private void excep_Click(object sender, RoutedEventArgs e)
{
try
{ ex_tb.Text = browseFolder(); }
catch (Exception e11)
{ MessageBox.Show("e111" + e11.Message); }
}
private void WriteLn(string text)
{
logtb.Dispatcher.BeginInvoke(new Action(() =>
{
logtb.Text += text + Environment.NewLine;
}));
log_list.Add(text);
log_cap = text + Environment.NewLine + log_cap;
using (StreamWriter sw = new StreamWriter(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"FileProcessing_log.txt"), false))
{
foreach (string l in log_list)
sw.WriteLine(l);
sw.Close();
}
}
}
}

The point of a BackgroundWorker is to push things off the UI thread. You do that, but then in Bw_DoWork you immediately push things back the UI thread, with this.Dispatcher.Invoke, which blocks the worker thread until the work now running on the UI thread completes.
Basically: remove that this.Dispatcher.Invoke call, and just run the code on the worker. If you need to touch the UI, then you'll need to be on the UI thread for those bits, but only those buts.
Likewise, I doubt that mutliocr should be using dispatch invoke, and it certainly shouldn't change the UI thread to being a background thread (Thread.CurrentThread.IsBackground = true;).

Your code is very very smelly and ugly. You seem to follow absolutely no naming convention at all. Locals and class members are camelCase and PascalCase and some use no casing at all and others use underscores. You should really review you code carefully with many aspects in mind and clean it up. There are some redundancies too.
A really bad habbit of yours is the excessive use of the Dispatcher. For example, you create a background thread and post the complete! work of this thread back to the Dispatcher/UI thread. A huge waste of resources and elimination of any multithreading benefit.
You don't want to put all your work on the Dispatcher. You want to offload CPU intensive eork to a background thread. You want to make use of asynchronous APIs where possible. Because you want to keep the UI responsive. Dispatcher means UI thread.
Some highly important points of interest
GrantAccess implementation is a severe security violation of user rights. Don't modify access rules. Rather filter and ignore resources where the current user is not authorized for access. What is especially critical, is that you never revert the access rights to the original state.
As a general rule: don't execute IO related code (e.g., database, HTTP streams, file IO) on a background thread. Use async APIs instead. Use threading only for CPU bound work (e.g. computations, conversions).
File has an async API you should always use. For example File.OpenRead returns a FileStream which exposes a FileStream.ReadAsnyc member. if you need more convenience when handling files like read line by line, then wrap the FileStream into a StreamReader/StreamWriter and use members like StreamReader.ReadLineAsync
To avoid Dispatcher calls, pass the required UI values to the concurent methods as argument. Better use data binding (which won't eliminate the cross threading issues writing, but would make your code more readable and eliminates Dispatcher invocation to read the values). Take alook at the refactored File_process method below. It shows how to pass UI values as argument to the converttiffpdfreducer method which is then executed on a background thread.
Consider to add cancellation support to to the longrunning converttiffpdfreducer()
Avoid calling ToArray or ToList on an IEnumerable. Those methods are finalizers that immediately execute the originally deferred LINQ queries.
Don't call ToArray or ToList on the result of EnumerateFiles and its likes. Those methods are used to improve performance as they return the filesystem objects item by item. This is especially important if you recursively iterate over the complete filesystem structure. If the filesystem tree is very deep and broad, calling ToArray will force the itereation to complete and then to return all results at once. ToArray on EnumerateFiles is like using GetFiles. You should review your complete code and refactor it properly. You always use EnumerateXYZ wrong!
"The EnumerateFiles and GetFiles methods differ as follows: When you
use EnumerateFiles, you can start enumerating the collection of names
before the whole collection is returned. When you use GetFiles, you
must wait for the whole array of names to be returned before you can
access the array. Therefore, when you are working with many files and
directories, EnumerateFiles can be more efficient."
Use data binding instead of directly accessing UI elements. This allows you to read the properties on the background thread without using the Dispatcher.
Never execute long running operation from the constructor
Never call async code from the constructor
Always keep object instantiation cheep and fast and without hidden performance/resource costs
Never catch Exception. Always catch a specialized exception type.
Don't use empty catch blocks. Either handle the exception if you can or laet it crash your application to give you a chance to fix bugs. When you swallow exceptions bugs will silently creep into your application. You will have a really hard time to discover them. Logging exceptions is not considered handling - rethrow in such case.
You don't have to close a resource explicitly if you declare the resource using the using statement. The implicit call to Dispose once the instruction pointer leaves the using scope will close the resource automatically.
Implementing all the suggestion will significantly speed up your application.
I have refactored only some of your code to show how to properly use async APIs and Task.Run instead of the BackgroundWorker. I have removed every Dispatcher invocation. Instead of direct access to UI elements in order to read their values from the background thread, I have extracted those values before invoking the concurent method and passed those prefetched values as method arguments. If you would use data binding you could read the property values directly and therefore ommit the method parameters.
The MainWindow should be shown manually from App.xaml.cs to allow asynchronous and longrunning initialization of the instance. For this pattern let the class that requires such initialization implement a public InitializeAsync method that can be awaited from the caller's context. Alternatively use Lazy<T> to defer initialization when required e.g., when initialization is depending on explicit access to members.
Although the refactored code will significantly improve the applicatoin's performance, you will have do to do some important refactoring yourself (following the pattern of the already refactored code sections).
Take a look at
InitializeAsync and WriteLnAsync to learn how to use the async file IO API.
converttiffpdfreducer to learn how to use the EnumerateFiles and EnumerateDirectories methods properly in order to significantly improve the performance.
mutliocr, merge and converttiffpdfreducer to learn how pass UI element values as argument in order to avoid Dispatcher invocations.
start_btn_Click and converttiffpdfreducer to learn how to implemnent cancellation and to guard your API against calls during an uninitialized state
App.xaml
<Application Startup="Application_Startup">
</Application>
App.xaml.cs
class App : Application
{
private async void Application_Startup(object sender, StartupEventArgs e)
{
var mainWindow = new MainWindow();
// Because InitializeAsync depends on UI elements,
// we have to wait until the Ui is loaded.
mainWindow.Loaded += OnMainWindowLoaded;
// Either call Show() before initialization or after.
// If before, ensure access to uninitialized members and resources is denied
// e.g. by querying the MainWindow.IsInitialized property in public members and event handlers.
mainWindow.Show();
}
private async void OnMainWindowLoaded(object sender, EventArgs args)
=> await mainWindow.InitializeAsync();
}
MainWindow.xaml.cs
public partial class MainWindow : Window
{
public bool IsInitialized { get; private set; }
private bool IsBusy { get; set; }
private CancellationTokenSource CancellationTokenSource { get; set; }
public MainWindow()
{
InitializeComponent();
CancellationTokenSource = new CancellationTokenSource();
}
// Execute blocking initialization routines asynchronously
public async Task InitializeAsync()
{
if (IsInitialized)
{
return;
}
// Will execute the intesive CPU bound work on a background thread.
await File_process(Cancellationtoken.None);
string configpath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"path.txt");
// Use async API to read/write from/to files and other IO resources
using (FileStream configfile = File.OpenRead(configpath))
{
using (var fileReader = new StreamReader(configfile))
{
var configFileContent = new List<string>();
while (!fileReader.EndOfStream)
{
string lineOfFile = await fileReader.ReadLineAsync();
configFileContent.Add(lineOfFile);
}
if (configFileContent.Any())
{
ip.Text = configFileContent[0];
GrantAccess(configFileContent[0]);
op.Text = configFileContent[1];
GrantAccess(configFileContent[1]);
ex_tb.Text = configFileContent[2];
GrantAccess(configFileContent[2]);
Protb.Text = configFileContent[3];
GrantAccess(configFileContent[3]);
}
}
}
cbPDFConform.Items.Clear();
for (int i = 0; i < Enum.GetNames(typeof(PdfConformance)).Length - 1; i++)
{
ComboBoxItem cbi = new ComboBoxItem();
cbi.Content = Enum.GetName(typeof(PdfConformance), (PdfConformance)i);
PdfConformance test = (PdfConformance)i;
cbi.Tag = (PdfConformance)i;
cbPDFConform.Items.Add(cbi);
}
cbPDFConform.SelectedIndex = 0;
cbProcessorCount.Items.Clear();
for (int i = 1; i <= Environment.ProcessorCount; i++)
{
cbProcessorCount.Items.Add(i.ToString());
if (Environment.ProcessorCount / 2 == i) { cbProcessorCount.SelectedIndex = i - 1; }
}
LicenseManager oLicenseManager = new LicenseManager();
oLicenseManager.RegisterKEY("");
threadSyn = SynchronizationContext.Current;
IsInitialiezd = true;
}
public async Task File_process(CancellationToken cancellationToken)
{
// Read UI values to avoid Dispatcher calls from the background thread
string ipText = ip.Text;
string protbText = Protb.Text;
string opText = op.Text;
// Execute the intesive CPU bound work on a background thread.
await Task.Run(() => converttiffpdfreducer(ipText, protbText, opText, cancellationToken), cancellationToken);
}
private async Task DoWorkAsync(CancellationToken cancellationToken)
{
IsBusy = true;
using (var sw = new StreamWriter(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"path.txt"), false))
{
await sw.WriteLineAsync(ip.Text);
await sw.WriteLineAsync(op.Text);
await sw.WriteLineAsync(ex_tb.Text);
await sw.WriteLineAsync(Protb.Text);
}
try
{
cancellationToken.ThrowIfCancellationRequested();
var watch1 = new System.Diagnostics.Stopwatch();
watch1.Start();
// Consider to add cancellation support to File_process
await File_process(cancellationToken);
watch1.Stop();
TimeSpan ts1 = watch1.Elapsed;
ts1.ToString("mm\\:ss");
if (MergeChk.IsChecked == false)
{
value = "OCRed";
}
await WriteLnAsync("All documents have been successfully " + value + " " + ts1 + " " + DateTime.Now + " " + Environment.UserName, cancellationToken);
IsBusy = false;
}
catch (OperationCanceledException)
{
IsBusy = false;
throw;
}
}
private async Task WriteLnAsync(string text, CancellationToken cancellationToken)
{
logtb.Text += text + Environment.NewLine;
log_list.Add(text);
log_cap = text + Environment.NewLine + log_cap;
using (var sw = new StreamWriter(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"FileProcessing_log.txt"), false))
{
foreach (string l in log_list)
{
cancellationToken.ThrowIfCancellationRequested();
await sw.WriteLineAsync(l);
}
}
}
private async void start_btn_Click(object sender, RoutedEventArgs e)
{
if (!IsInitialized)
{
return;
}
if (IsBusy)
{
// Cancel the longrunning operation.
this.CancellationTokenSource.Cancel();
}
start_btn.Content = "Start";
try
{
await DoWorkAsync(CancellationTokenSource.Token);
}
catch (OperationCanceledException)
{
CancellationTokenSource?.Dispose();
CancellationTokenSource = new CancellationTokenSource();
}
}
private void converttiffpdfreducer(
string ipText,
string protbText,
string opText,
CancellationToken cancellationToken)
{
string[] dir = null;
string box = string.Empty;
string box1 = string.Empty;
string[] gg = null;
PdfConformance optPDFConform = PdfConformance.Unknown;
foreach (var directoryPath in Directory.EnumerateDirectories(ipText, "*.*", SearchOption.AllDirectories).Where(l => l.Length != 0))
{
cancellationToken.ThrowIfCancellationRequested();
foreach (var filePath in Directory.EnumerateFiles(directoryPath, "*.*", SearchOption.AllDirectories))
{
cancellationToken.ThrowIfCancellationRequested();
string getext = Path.GetExtension(filePath);
string fd = Path.GetDirectoryName(filePath);
string op_path = fd.Replace(ipText, protbText);
string getextension = Path.GetExtension(filePath);
string dict = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Redist", "OCR");
string outputPath = fd.Replace(ipText, protbText);
string FNAME = Path.GetFileNameWithoutExtension(filePath);
string fn = Path.GetDirectoryName(filePath).Replace(ipText, protbText);
string filen = Path.Combine(outputPath, fn, FNAME + ".pdf");
string savefile = Path.Combine(op_path, filen);
box = Path.GetDirectoryName(filePath);
box1 = Path.GetDirectoryName(box);
using (GdPictureDocumentConverter oConverter = new GdPictureDocumentConverter())
{
GdPictureStatus status = new GdPictureStatus();
if (Path.GetExtension(filePath).ToUpper() == ".PDF")
{
status = oConverter.LoadFromFile(filePath, GdPicture14.DocumentFormat.DocumentFormatPDF);
}
else if (Path.GetExtension(filePath).ToUpper() == ".TIF" || Path.GetExtension(filePath).ToUpper() == ".TIFF")
{
status = oConverter.LoadFromFile(filePath, GdPicture14.DocumentFormat.DocumentFormatTIFF);
}
else if (Path.GetExtension(filePath).ToUpper() == ".JPG")
{
status = oConverter.LoadFromFile(filePath, GdPicture14.DocumentFormat.DocumentFormatJPEG);
}
if (status == GdPictureStatus.OK)
{
if (!Directory.Exists(op_path))
{
Directory.CreateDirectory(op_path);
}
GrantAccess(op_path);
optPDFConform = (PdfConformance)((ComboBoxItem)cbPDFConform.SelectedItem).Tag;
status = oConverter.SaveAsPDF(savefile, optPDFConform);
if (status == GdPictureStatus.OK)
{ }
else
{ }
}
else
{ }
}
}
}
string BOXX = box.Replace(ipText, protbText);
// TODO::Refactor 'merge' and replace 'ToArray' with 'foreach'
string[] Arr = Directory.EnumerateFiles(BOXX, "*.pdf", SearchOption.AllDirectories).ToArray();
if (MergeChk.IsChecked == true)
{ merge(Arr, protbText); }
else if (MergeChk.IsChecked == false)
{
mutliocr(Arr);
}
}
private string[] merge(string[] arr, string protbText, string opText)
{
string box = string.Empty;
string box1 = string.Empty; string[] gg = null;
System.Windows.Threading.Dispatcher.CurrentDispatcher.Invoke((Action)(() =>
{
box = Path.GetDirectoryName(arr[0]);
box1 = Path.GetDirectoryName(box);
string dirName = Directory.GetParent(arr[0]).FullName;
string BOXFILES = Path.GetDirectoryName(dirName);
string folder = Directory.GetParent(arr[0]).FullName.Replace(protbText, opText);
string ocrfolder = (new FileInfo(arr[0]).Directory.FullName).Replace(protbText, opText);
string fn = Directory.GetParent(arr[0]).Name;
string filen = Path.Combine(ocrfolder, folder, fn + ".pdf");
if (!Directory.Exists(ocrfolder))
{
Directory.CreateDirectory(ocrfolder);
}
GrantAccess(ocrfolder);
using (GdPicturePDF oGdPicturePDF = new GdPicturePDF())
{
GdPictureStatus status = oGdPicturePDF.MergeDocuments(ref arr, filen);
if (status == GdPictureStatus.OK)
{ }
else
{ }
oGdPicturePDF.Dispose();
}
Directory.Delete(box, true);
string BOXX = box.Replace(protbText, opText);//op
string[] files = Directory.EnumerateFiles(BOXX, "*.pdf", SearchOption.AllDirectories).ToArray();
if (MergeChk.IsChecked == true)
{ mutliocr(files, protbText, opText); }
}));
return gg;
}
private string[] mutliocr(string[] arr, string protbText, string opText)
{
string box = string.Empty;
string box1 = string.Empty;
try
{
string filepath = string.Empty;
if (MergeChk.IsChecked == true)
{ filepath = opText; }
else if (MergeChk.IsChecked == false)
{ filepath = protbText; }
System.Windows.Threading.Dispatcher.CurrentDispatcher.Invoke((Action)(() =>
{
Thread.CurrentThread.IsBackground = true;
var watch2 = new System.Diagnostics.Stopwatch();
watch2.Start();
string[] getfilearray = arr;
for (int f = 0; f < getfilearray.Length; f++)
{
string dirName = Directory.GetParent(getfilearray[f]).FullName;
string folder = Directory.GetParent(getfilearray[f]).FullName;
box = Path.GetDirectoryName(getfilearray[f]);
box1 = Path.GetDirectoryName(box);
string getextension = Path.GetExtension(getfilearray[f]);
string[] newF = Directory.EnumerateFiles(dirName, "*.*", SearchOption.AllDirectories).ToArray();
string FN = Directory.GetParent(getfilearray[f]).Name;
string ocrfolder = (new FileInfo(getfilearray[f]).Directory.FullName);
string filen = Path.Combine(ocrfolder, folder, FN + "-ocr" + getextension);
string dict = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Redist", "OCR");
if (!Directory.Exists(ocrfolder))
{
Directory.CreateDirectory(ocrfolder);
}
GrantAccess(ocrfolder);
GdPicturePDF oGdPicturePDF = new GdPicturePDF();
oGdPicturePDF.OcrPagesDone += OcrPagesDone;
void OcrPagesDone(GdPictureStatus status1)
{
if (oGdPicturePDF.SaveToFile(filen) == GdPictureStatus.OK)
{ }
else
MessageBox.Show("PDF: The OCR-ed file has failed to save. Status: " + oGdPicturePDF.GetStat().ToString());
}
GdPictureStatus status = GdPictureStatus.OK;
if (oGdPicturePDF.LoadFromFile(getfilearray[f], false) == GdPictureStatus.OK)
if (status == GdPictureStatus.OK)
{
if (oGdPicturePDF.OcrPages_4("*", 0, "eng", dict, "", 300, OCRMode.FavorSpeed, 1, true) == GdPictureStatus.OK)
if (status == GdPictureStatus.OK)
{ }
else
{ MessageBox.Show("PDF: The OCR process has failed. Status: " + status.ToString()); }
}
else
{ MessageBox.Show("PDF: The PDF file has failed to load. Status: " + status.ToString()); }
oGdPicturePDF.Dispose();
GrantAccess(getfilearray[f]);
File.Delete(getfilearray[f]);
watch2.Stop();
TimeSpan ts2 = watch2.Elapsed;
ts2.ToString("mm\\:ss");
WriteLn(" OCR pages " + filen.Replace(opText, "") + " " + ts2 + " " + DateTime.Now);
}
if (MergeChk.IsChecked == true)
{
foreach (string str in Directory.EnumerateFiles(opText, "*.pdf", SearchOption.AllDirectories).ToArray())
{
if (Path.GetFileNameWithoutExtension(str).EndsWith("-ocr"))
File.Move(str, Path.Combine(Path.GetDirectoryName(str), Path.GetFileNameWithoutExtension(str).Substring(0, Path.GetFileNameWithoutExtension(str).Length - 4) + ".pdf"));
}
}
if (MergeChk.IsChecked == false)
{
FileSystem.MoveDirectory(protbText, opText, UIOption.AllDialogs);
Directory.CreateDirectory(protbText);
string FF = string.Empty;
foreach (string str in Directory.EnumerateFiles(opText, "*.pdf", SearchOption.AllDirectories))
{
if (Path.GetFileNameWithoutExtension(str).EndsWith("-ocr"))
File.Move(str, Path.Combine(Path.GetDirectoryName(str), Path.GetFileNameWithoutExtension(str).Substring(0, Path.GetFileNameWithoutExtension(str).Length - 4) + ".pdf"));
}
}
}));
}
catch (Exception mul)
{
}
return arr;
}
}

Related

C# and ArcMaps: How do i pass ui inputs to background thread without getting SERVERFAULT?

I'm helping a colleague debug a tool he is creating for ArcGis. It is used to create Replicas, and is done through selecting some different inputs from dropdowns in a winform.
The issue is that when running all the code on the UI-thread, our ui freezes. This is what he wants me to solve. The code causing this is the following, the relevant code is just the button1_Click_1() method, but i provided the second class for contex:
public partial class FrmReplicaAdmin : Form
{
private void button1_Click_1(object sender, EventArgs e)
{
DataConnectionConfig selectedDatabase = cmboxDatabase.SelectedItem as DataConnectionConfig;
if (selectedDatabase.PlWorkspace == null)
{
statusLabel.Text = "Could not open GeoDatabase (PL)";
return;
}
if (selectedDatabase.DataWorkspace == null)
{
statusLabel.Text = "Could not open GeoDatabase (NIS)";
return;
}
int scaleBand = int.Parse(cmboxScale.SelectedItem.ToString());
string gridName = cmboxGridNr.SelectedItem as string;
IGeometry shapeOfSelectedAOI = getSelectedPolygon(gridName, scaleBand, selectedDatabase);
(ArcMap.Application as IMxApplication2).PauseDrawing = true;
replica.CheckOutReplica(selectedDatabase.PlWorkspace, selectedDatabase.DataWorkspace, selectedDatabase.TemplateGdb, gridName, scaleBand, shapeOfSelectedAOI, selectedDatabase.DatasetName);
(ArcMap.Application as IMxApplication2).PauseDrawing = false;
}
}
public class Replica
{
public void CheckOutReplica(IWorkspace plWorkspace, IWorkspace parentWorkspace, string pathToDatabaseTemplate, string gridName, int scaleBand, IGeometry shapeOfSelectedAOI, string datasetName = "NIS.Nautical")
{
try
{
string replicaName = string.Format("{0}_{1}_r", Environment.UserName, gridName);
string versionName = string.Format("{0}_{1}", Environment.UserName, gridName);
string pathToLocalChildDatabase = System.IO.Path.Combine(ReplicationPath, $"{replicaName}.gdb");
Directory.CreateDirectory(pathToLocalChildDatabase);
foreach (string newPath in Directory.GetFiles(pathToDatabaseTemplate, "*.*", SearchOption.AllDirectories))
File.Copy(newPath, newPath.Replace(pathToDatabaseTemplate, pathToLocalChildDatabase), true);
IWorkspace childWorkspace = OpenWorkspace(pathToLocalChildDatabase);
// Create Version in ParentDatabase
IEnumVersionInfo versionEnum = (parentWorkspace as IVersionedWorkspace).Versions;
versionEnum.Reset();
for (IVersionInfo versionInfo = versionEnum.Next(); versionInfo != null; versionInfo = versionEnum.Next())
{
if (versionInfo.VersionName.EndsWith(versionName))
{
System.Windows.Forms.MessageBox.Show("A version named '" + versionName + "' has already been created", "map...", System.Windows.Forms.MessageBoxButtons.OK);
return;
}
}
Marshal.ReleaseComObject(versionEnum);
IVersion newVersion = (parentWorkspace as IVersionedWorkspace).DefaultVersion.CreateVersion(versionName);
newVersion.Access = esriVersionAccess.esriVersionAccessPublic;
string defQuery = "((IS_CONFLATE=1 AND PLTS_COMP_SCALE >= " + scaleBand + ") OR ((IS_CONFLATE=0 OR IS_CONFLATE IS NULL) AND PLTS_COMP_SCALE = " + scaleBand + "))";
ReplicateData(parentWorkspace, newVersion.VersionInfo, replicaName, shapeOfSelectedAOI, childWorkspace, defQuery, datasetName);
// Update map. Show replica data
ILayerFile nauticalLyrFile = new LayerFileClass();
nauticalLyrFile.Open(ReplicationPath + #"\Nautical.lyr");
AddDataToMap((ArcMap.Application.Document as IMxDocument), childWorkspace as IFeatureWorkspace, nauticalLyrFile, gridName, datasetName);
(ArcMap.Application.Document as IMxDocument).ActiveView.Extent = shapeOfSelectedAOI.Envelope;
Marshal.ReleaseComObject(childWorkspace);
}
catch (Exception err)
{
System.Windows.Forms.MessageBox.Show($"Unexpected error. {Environment.NewLine}{err.Message}", "map...", System.Windows.Forms.MessageBoxButtons.OK);
}
}
}
private void ReplicateData(IWorkspace parentWorkspace, IVersionInfo versionInfo, string replicaName, IGeometry area, IWorkspace childWorkspace, string definitionQuery, string featureDataset)
{
if (childWorkspace == null)
throw new ArgumentNullException("Child workspace is null.");
if (parentWorkspace == null)
throw new ArgumentNullException("Parent workspace is null.");
if (versionInfo == null)
throw new ArgumentNullException("Version name is null.");
if (string.IsNullOrEmpty(replicaName))
throw new ArgumentNullException("Replica name is null.");
if (area == null)
throw new ArgumentNullException("Area geometry is null.");
IVersion oVersion = (parentWorkspace as IVersionedWorkspace).FindVersion(versionInfo.VersionName);
IWorkspace sdeVersionWorkspace = oVersion as IWorkspace;
IGeoDataServer parentGds = InitGeoDataServer(sdeVersionWorkspace),
childGds = InitGeoDataServer(childWorkspace);
CreateFeatureDatasetReplica(parentGds, childGds, versionInfo, replicaName, parentWorkspace, childWorkspace, area, definitionQuery, featureDataset);
Marshal.ReleaseComObject(parentGds);
Marshal.ReleaseComObject(childGds);
}
//Function to create the replica, based on this link http://help.arcgis.com/en/sdk/10.0/arcobjects_net/conceptualhelp/index.html#//0001000003r5000000
private void CreateFeatureDatasetReplica(IGeoDataServer parentGDS, IGeoDataServer childGDS, IVersionInfo versionInfo, string replicaName, IWorkspace parentWorkspace, IWorkspace childWorkspace, IGeometry geometry, string definitionQuery, string featureDatasetName)
{
IList<string> existingReplicas = ReadExistingReplicas(parentGDS);
if (existingReplicas.Contains(replicaName.ToUpper()))
{
throw new Exception("A replica with the following name has already been created: " + replicaName);
}
IEnumDataset datasets = null;
if (!string.IsNullOrEmpty(featureDatasetName))
{
IEnumDataset featureDatasets = parentWorkspace.get_Datasets(esriDatasetType.esriDTFeatureDataset);
IFeatureDataset featureDataset;
while ((featureDataset = featureDatasets.Next() as IFeatureDataset) != null)
{
if (featureDataset.Name == featureDatasetName)
{
datasets = featureDataset.Subsets;
break;
}
}
if (datasets == null)
throw new Exception("Didn't find FeatureDataset " + featureDatasetName + " in the db");
}
else
{
datasets = parentWorkspace.get_Datasets(esriDatasetType.esriDTFeatureClass);
}
IGPReplicaDatasets gpReplicaDatasets = new GPReplicaDatasetsClass();
IDataset dataset;
while ((dataset = datasets.Next()) != null)
{
//temporary workaround to not include a view that is on the feature classes :^)
if (dataset.Name.Contains("VW_") || dataset.Name.Contains("_EVW"))
continue;
if (m_ListExcludedTables.Contains(dataset.Name.Substring(dataset.Name.LastIndexOf(".") + 1).ToUpper()))
continue;
if (!(childWorkspace as IWorkspace2).NameExists[dataset.Type, dataset.Name.Substring(dataset.Name.LastIndexOf(".") + 1)])
continue;
IGPReplicaDataset gpReplicaDataset = new GPReplicaDatasetClass();
gpReplicaDataset.DatasetType = dataset.Type;
gpReplicaDataset.Name = dataset.Name.ToUpper();
gpReplicaDataset.IsPrivate = false;
gpReplicaDataset.UseGeometry = true;
gpReplicaDataset.RowsType = esriRowsType.esriRowsTypeFilter;
if ((dataset as ITable).Fields.FindField("PLTS_COMP_SCALE") != -1)
gpReplicaDataset.DefQuery = definitionQuery; //DefQuery here
else
gpReplicaDataset.DefQuery = "";
gpReplicaDatasets.Add(gpReplicaDataset);
}
IGPReplicaDescription gpReplicaDesc = new GPReplicaDescriptionClass();
gpReplicaDesc.QueryGeometry = geometry;
gpReplicaDesc.SpatialRelation = esriSpatialRelEnum.esriSpatialRelIntersects;
gpReplicaDesc.ModelType = esriReplicaModelType.esriModelTypeSimple;
gpReplicaDesc.SingleGeneration = true;
gpReplicaDesc.ReplicaDatasets = gpReplicaDatasets;
IGPReplicaOptions2 replicaOptions = new GPReplicaOptionsClass();
replicaOptions.AccessType = esriReplicaAccessType.esriReplicaAccessNone;
replicaOptions.RegisterReplicaOnly = true;
ExtractData(datasets, childWorkspace, geometry, definitionQuery);
IReplicationAgent replicationAgent = new ReplicationAgentClass();
replicationAgent.CreateReplica(versionInfo.VersionName, parentGDS, childGDS, replicaName, gpReplicaDesc, replicaOptions);
}
}
For fixing the UI freeze i made the following changes to the button1_Click_1() method:
public partial class FrmReplicaAdmin : Form
{
private void button1_Click_1(object sender, EventArgs e)
{
DataConnectionConfig selectedDatabase = cmboxDatabase.SelectedItem as DataConnectionConfig;
if (selectedDatabase.PlWorkspace == null)
{
statusLabel.Text = "Could not open GeoDatabase (PL)";
return;
}
if (selectedDatabase.DataWorkspace == null)
{
statusLabel.Text = "Could not open GeoDatabase (NIS)";
return;
}
int scaleBand = int.Parse(cmboxScale.SelectedItem.ToString());
string gridName = cmboxGridNr.SelectedItem as string;
IGeometry shapeOfSelectedAOI = getSelectedPolygon(gridName, scaleBand, selectedDatabase);
// adding inputs to list i can pass onto the backgroundWorker
List<object> arguments = new List<object>();
arguments.Add(selectedDatabase.PlWorkspace);
arguments.Add(selectedDatabase.DataWorkspace);
arguments.Add(selectedDatabase.TemplateGdb);
arguments.Add(gridName);
arguments.Add(scaleBand);
arguments.Add(shapeOfSelectedAOI);
arguments.Add(selectedDatabase.DatasetName);
backgroundWorker1.RunWorkerAsync(arguments);
// starting progress bar
progressBarReplica.Visible = true;
lblReplica.Text = "Checking out replica...";
}
private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
{
List<object> genericlist = e.Argument as List<object>;
IWorkspace ws = (IWorkspace)genericlist[0];
IWorkspace pWs = (IWorkspace)genericlist[1];
string pathToDbTemplate = genericlist[2].ToString();
string gName = genericlist[3].ToString();
int sBand = (int)genericlist[4];
IGeometry shape = (IGeometry)genericlist[5];
string dsName = genericlist[6].ToString();
(ArcMap.Application as IMxApplication2).PauseDrawing = true;
replica.CheckOutReplica(ws, pWs, pathToDbTemplate, gName, sBand, shape, dsName);
(ArcMap.Application as IMxApplication2).PauseDrawing = false;
}
}
This is causing a: "RPC_E_SERVERFAULT(0X80010105)", but the UI isn't freezing anymore. My guess is that it's because i'm initiating the database in the first thread, and then using it in the second. I also sort of get that i can't use a backgroundWorker due to the entire STA and COM-object things with ArcGis, but i'm still not getting all this 100%.
Any help of a possible solution to making my UI responsive, or at least showing some sort of progressbar while the task is running would be nice. The entire process can take a few minutes at times, and the program feels like it's crashed meanwhile due to the freeze.
Edit: I'm referring to ArcMap, forgot to mention that.

Exception Info: System.OutOfMemoryException

I am getting the following error:
Exception Info: System.OutOfMemoryException
Stack:
at System.Threading.ExecutionContext.CreateCopy()
at System.Threading.Tasks.Task.CopyExecutionContext(System.Threading.ExecutionContext)
at System.Threading.Tasks.Task.ExecuteWithThreadLocal(System.Threading.Tasks.Task ByRef)
at System.Threading.Tasks.Task.ExecuteEntry(Boolean)
at System.Threading.Tasks.Task.System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()
at System.Threading.ThreadPoolWorkQueue.Dispatch()
at System.Threading._ThreadPoolWaitCallback.PerformWaitCallback()
The application is a c# windows service (using TopShelf). The app is using HanFire intitiate RunScan() method.
I do not see where in the code, but I suspect it is add to the Blocking Collection.
Code:
public void RunScan(IJobCancellationToken cancellationToken, string path, int pathId)
{
SmartScanDAO scDAO = new SmartScanDAO();
PathInfo RootPathInfo = scDAO.ScanStarted(pathId);
try
{
if (RootPathInfo == null)
{
ErrorLogger.LogEvent(RootPathInfo.Id, string.Format("Path ({1}): {0} is null", path, pathId), EventLogEntryType.Error);
return;
}
if (RootPathInfo.Status == ScanStatus.Processing)
{
ErrorLogger.LogEvent(RootPathInfo.Id, string.Format("Path {0} is currently being scanned", path), EventLogEntryType.Information);
return;
}
RootPathInfo.Status = ScanStatus.Processing;
scDAO.ScanStatus(RootPathInfo);
ErrorLogger.LogEvent(string.Format("Scanning {0}", path), EventLogEntryType.Information);
if (!Directory.Exists(path))
{
scDAO.DisableIsilonScanPathById(RootPathInfo.Id);
ErrorLogger.LogEvent(RootPathInfo.Id, "The Path does not exists: " + path, EventLogEntryType.Error);
return;
}
// Get Directories to Skip
skipPaths = scDAO.GetDuplicateIsilonScanPaths(RootPathInfo.Path);
DirectoryInfo di = new DirectoryInfo(path);
SplunkExport.DeleteFiles(path, new List<string>() { "acl", "path" }, RootPathInfo.Id);
DirectoryType = (DirectoryType)Enum.Parse(typeof(DirectoryType), RootPathInfo.DirectoryType);
RootPathInfo.Path = di.FullName.ToLower();
RootPathInfo.Owner = GetAcl(RootPathInfo.Id, RootPathInfo.Path, DirectoryType, true, false, true);// SecurityUtils.GetOwner(di.FullName, (DirectoryType)Enum.Parse(typeof(DirectoryType), RootPathInfo.DirectoryType));
RootPathInfo.Files = 0;
RootPathInfo.Size = 0;
Interlocked.Add(ref FileCount, di.GetFiles().Length);
Interlocked.Add(ref DirectorySize, (int)di.GetFiles().Sum(f => f.Length));
Task<List<string>> outputMetaDataTask = Task.Factory.StartNew(() => WriteOutput(RootPathInfo.Path, SplunkFileType.MetaData, MetaDataQueue), TaskCreationOptions.LongRunning);
Task<List<string>> outputACLTask = Task.Factory.StartNew(() => WriteOutput(RootPathInfo.Path, SplunkFileType.ACL, ACLQueue), TaskCreationOptions.LongRunning);
Action action = (() => UpdateStats(RootPathInfo.Id, MetaDataQueue, ACLQueue));
CancellationTokenSource UpdateStatsToken = new CancellationTokenSource();
IObservable<long> observable = Observable.Interval(TimeSpan.FromMinutes(10));
// Subscribe the obserable to the task on execution.
observable.Subscribe(x =>
{
Task task = new Task(action); task.Start();
// task.ContinueWith(c => resumeAction());
}, UpdateStatsToken.Token);
MetaDataQueue.Add(string.Format("\"{0}\",\"{1}\",\"{2}\",\"{3}\",\"{4}\",\"{5}\",\"{6}\",\"{7}\",\"{8}\"", DateTime.UtcNow + " UTC", di.FullName.ToLower(), 1, ((DirectoryInfo)di).GetFiles().Length, string.Format("{0:0.0}", ((DirectoryInfo)di).GetFiles().Sum(f => f.Length) / 1024 / 1024), RootPathInfo.Owner, di.LastAccessTimeUtc, di.CreationTimeUtc, di.LastWriteTimeUtc, ""));
//
// Traverse the path
GetSystemObjects(cancellationToken, di, RootPathInfo.Id, DirectoryType);
// Complete adding
MetaDataQueue.CompleteAdding();
ACLQueue.CompleteAdding();
// wait for
outputMetaDataTask.Wait();
outputACLTask.Wait();
//Send Files to Splunk
SplunkExport.CopyFilesToSplunk(outputMetaDataTask.Result, outputACLTask.Result, RootPathInfo.Id);
SmartScanDAO dao = new SmartScanDAO();
RootPathInfo.Size = DirectorySize;
}
catch (OperationCanceledException cex)
{
RootPathInfo.Status = ScanStatus.Cancelled;
if (scDAO == null)
scDAO = new SmartScanDAO();
scDAO.ScanStatus(RootPathInfo);
ErrorLogger.LogEvent(cex, RootPathInfo.Id);
}
catch (Exception ex)
{
if (RootPathInfo == null)
{
RootPathInfo = new PathInfo();
RootPathInfo.Id = pathId;
}
ErrorLogger.LogEvent(ex, RootPathInfo.Id);
RootPathInfo.Status = ScanStatus.Error;
if (scDAO == null)
scDAO = new SmartScanDAO();
scDAO.ScanStatus(RootPathInfo);
}
}
List<string> WriteOutput(string path, SplunkFileType fileType, BlockingCollection<string> queue)
{
var fileList = new List<string>();
int filecount = 1;
int linecount = 0;
int maxlinecount = 200000;
string header = (fileType.ToString() == SplunkFileType.ACL.ToString()?aclHeader:metaDataHeader);
var filepattern = SplunkExport.GetPaths(path, fileType.ToString());
string filename = string.Format(filepattern, filecount);
fileList.Add(filename);
while (true)
{
using (var strm = File.AppendText(filename))
{
foreach (var s in queue.GetConsumingEnumerable())
{
if (linecount == 0)
strm.WriteLine(header);
strm.WriteLine(s);
// if you want to make sure it's written to disk immediately,
// call Flush. This will slow performance, however.
strm.Flush();
linecount++;
if (linecount > maxlinecount)
{
linecount = 0;
filecount++;
break;
}
}
}
if (queue.IsCompleted)
break;
filename = string.Format(filepattern, filecount);
fileList.Add(filename);
}
return fileList;
}
private void GetSystemObjects(IJobCancellationToken cancellationToken, DirectoryInfo di, int pathid, DirectoryType directorytype = DirectoryType.Share)
{
long files = 0;
long size = 0;
int mb = 1024 * 1024;
try
{
Parallel.ForEach<FileSystemInfo>(di.EnumerateFileSystemInfos("*", System.IO.SearchOption.TopDirectoryOnly).Where(r => !r.FullName.Contains(#"\~snapshot") ), (FileSystemInfo fso) =>
{
if (skipPaths.Contains(fso.FullName))
return;
if (cancellationToken != null)
cancellationToken.ThrowIfCancellationRequested();
bool isDirectory = fso.EntryInfo.IsDirectory;
string owner = "";
owner = GetAcl(pathid, fso.FullName, directorytype, isDirectory, false);
try
{
if (isDirectory)
{
DirectoryInfo dis = new DirectoryInfo(fso.FullName);
lock (lckObject)
{
files = ((DirectoryInfo)fso).GetFiles().Length;
size = ((DirectoryInfo)fso).GetFiles().Sum(f => f.Length);
}
Interlocked.Add(ref FileCount, files);
Interlocked.Add(ref DirectorySize, size);
ErrorLogger.LogEvent(pathid, string.Format("Scan Directory\t{0}\t{1}", fso.FullName, files));
}
else
{
size = ((FileInfo)fso).Length;
files = 0;
}
MetaDataQueue.Add(string.Format("\"{0}\",\"{1}\",\"{2}\",\"{3}\",\"{4}\",\"{5}\",\"{6}\",\"{7}\",\"{8}\"", DateTime.UtcNow + " UTC", fso.FullName.ToLower(), (fso.EntryInfo.IsDirectory ? 1 : 0), files, string.Format("{0:0.0}", size / mb), owner, fso.LastAccessTimeUtc, fso.CreationTimeUtc, fso.LastWriteTimeUtc));
}
catch (Exception ex)
{
ErrorLogger.LogEvent(ex, pathid);
}
if (isDirectory)
GetSystemObjects(cancellationToken, (DirectoryInfo)fso, pathid, directorytype);
fso = null;
}); // end of ForEach
}
catch (Exception ex)
{
ErrorLogger.LogEvent(ex, pathid);
}
}
Any suggestions where the error may be occurring or how to get closer to the root cause. In the exception, I don't see any indication where it is.

Database async query and processing

Let me rephrase.
I have a method that generates strings(paths) after given a start string(path)
IF those paths are for a directory I want to enqueue that in the input of the method.
After processing the path synchronously, I want to get the Data and clone it async into multiple paths of a pipeline, were each path needs to get the datablock. So the Broadcastblock is out of the question (it cant send a blocking signal to the blocks before itself),
The joinblock, joining the results is relatively straight forward.
So to sum up
Is there a Block in Dataflow block, where i can access the inputqueue from the delegate, if when, how?
Is there a construct that acts like the broadcastblock but can block the blocks that came before it?
I tried doing it via almighty google:
class subversion
{
private static string repo;
private static string user;
private static string pw;
private static DateTime start;
private static DateTime end;
private static List<parserObject> output;
public static List<parserObject> svnOutputList
{
get {return output; }
}
private static List<string> extension_whitelist;
public async void run(string link, string i_user, string i_pw, DateTime i_start, DateTime i_end)
{
repo = link;
user = i_user;
pw = i_pw;
start = i_start;
end = i_end;
output = new List<parserObject>();
BufferBlock<string> crawler_que = new BufferBlock<string>();
BufferBlock<svnFile> parser_que = new BufferBlock<svnFile>();
var svn = crawl(crawler_que, parser_que);
var broadcaster = new ActionBlock<svnFile>(async file =>
{//tried to addapt the code from this ensure always send broadcastblock -> see link below
List<Task> todo = new List<Task>();
todo.Add(mLoc);//error cannot convert methodgroup to task
foreach (var task in todo)//error: Only assignment, call, increment, decrement, await, and new object expressions can be used as a statement?
{
task.SendAsync(file);//error cannot convert task to targetblock
}
await Task.WhenAll(todo.ToArray());
});
parser_que.LinkTo(broadcaster);
await Task.WhenAll(broadcaster, svn);//error cannot convert actionblock to task
}
private static async Task crawl(BufferBlock<string> in_queue, BufferBlock<svnFile> out_queue)
{
SvnClient client = new SvnClient();
client.Authentication.ForceCredentials(user, pw);
SvnListArgs arg = new SvnListArgs
{
Depth = SvnDepth.Children,
RetrieveEntries = SvnDirEntryItems.AllFieldsV15
};
while (await in_queue.OutputAvailableAsync())
{
string buffer_author = null;
string prev_author = null;
System.Collections.ObjectModel.Collection<SvnListEventArgs> contents;
string link = await in_queue.ReceiveAsync();
if (client.GetList(new Uri(link), arg, out contents))
{
foreach (SvnListEventArgs item in contents)
{
if (item.Entry.NodeKind == SvnNodeKind.Directory)
{
in_queue.Post(item.Path);
}
else if (item.Entry.NodeKind == SvnNodeKind.File)
{
try
{
int length = item.Name.LastIndexOf(".");
if (length <= 0)
{
continue;
}
string ext = item.Name.Substring(length);
if (extension_whitelist.Contains(ext))
{
Uri target = new Uri((repo + link));
SvnRevisionRange range;
SvnBlameArgs args = new SvnBlameArgs
{
Start = start.AddDays(-1),
End = end
};
try
{
svnFile file_instance = new svnFile();
client.Blame(target, args, delegate(object sender3, SvnBlameEventArgs e)
{
if (e.Author != null)
{
buffer_author = e.Author;
prev_author = e.Author;
}
else
{
buffer_author = prev_author;
}
file_instance.lines.Add(new svnLine(buffer_author, e.Line));
});
out_queue.Post(file_instance);
}
catch (Exception a) { Console.WriteLine("exception:" + a.Message);}
}
}
catch (Exception a)
{
}
}
}
}
}
}
private static async Task mLoc(svnFile file)
{
List<parserPart> parts = new List<parserPart>();
int find;
foreach (svnLine line in file.lines)
{
if ((find = parts.FindIndex(x => x.uploader_id == line.author)) > 0)
{
parts[find].count += 1;
}
else
{
parts.Add(new parserPart(line.author));
}
find = 0;
}
parserObject ret = new parserObject(parts, "mLoc");
await output.Add(ret);
return;
}
}
broadcastblock answer: Alternate to Dataflow BroadcastBlock with guaranteed delivery

Multithreading issue ,Maybe a DeadLock using Foreach

Parallel.ForEach keeps on running and my program does not end. I am unable to trace where it goes after the first iteration. My guess is that gets a deadlock and keeps on doing context switching.
private void ReadInputFile()
{
var collection = new ConcurrentBag<PropertyRecord>();
var lines = System.IO.File.ReadLines(InputFileName);
int i = 0;
int RecordsCount = lines.Count();
Parallel.ForEach(lines, line =>
{
if (string.IsNullOrWhiteSpace(line))
{
return;
}
var tokens = line.Split(',');
var postalCode = tokens[0];
var country = tokens.Length > 1 ? tokens[1] : "england";
SetLabelNotifyTwoText(
string.Format(
"Reading PostCode {0} out of {1}"
i,
lines.Length));
var tempRecord = GetAllAddesses(postalCode, country);
if (tempRecord != null)
{
foreach (PropertyRecord r in tempRecord)
{
collection.Add(r);
}
}
});
}
private List<PropertyRecord> GetAllAddesses(
string postalCode,
string country = "england")
{
SetLabelNotifyText("");
progressBar1.Value = 0;
progressBar1.Update();
var records = new List<PropertyRecord>();
using (WebClient w = new WebClient())
{
var url = CreateUrl(postalCode, country);
var document = w.DownloadString(url);
var pagesCount = GetPagesCount(document);
if (pagesCount == null)
{
return null;
}
for (int i = 0; i < pagesCount; i++)
{
SetLabelNotifyText(
string.Format(
"Reading Page {0} out of {1}",
i,
pagesCount - 1));
url = CreateUrl(postalcode,country, i);
document = w.DownloadString(url);
var collection = Regex.Matches(
document,
"<div class=\"soldDetails\">(.|\\n|\\r)*?class=" +
"\"soldAddress\".*?>(?<address>.*?)(</a>|</div>)" +
"(.|\\n|\\r)*?class=\\\"noBed\\\">(?<noBed>.*?)" +
"</td>|</tbody>");
foreach (var match in collection)
{
var r = new PropertyRecord();
var bedroomCount = match.Groups["noBed"].Value;
if(!string.IsNullOrEmpty(bedroomCount))
{
r.BedroomCount = bedroomCount;
}
else
{
r.BedroomCount = "-1";
}
r.address = match.Groups["address"].Value;
var line = string.Format(
"\"{0}\",{1}",
r.address
r.BedroomCount);
OutputLines.Add(line);
Records.Add(r);
}
}
}
return Records;
}
It runs fine without Parallel.ForEach, but using Parallel.ForEach is in requirements.
I have debugged it and after returning from GetAllAdresses-method first time, Step Next button halts and it just keep on debugging in the background. It doesn't come back on any bookmark I have placed.
As you said in comments, your SetLabelNotifyText and SetLabelNotifyTwoText methods calls Control.Invoke.
For Control.Invoke to work, Main thread has to be free, but in your case you seem to block the main thread by invoking Parallel.ForEach in it.
Here is a minimal reproduction:
private void button1_Click(object sender, EventArgs e)
{
Parallel.ForEach(Enumerable.Range(1, 100), (i) =>
{
Thread.Sleep(10);//Simulate some work
this.Invoke(new Action(() => SetText(i)));
});
}
private void SetText(int i)
{
textBox1.Text = i.ToString();
}
Main thread waits for Parallel.ForEach and worker threads waits for Main thread, and thus results in deadlock.
How to fix: Don't use Invoke simply use BeginInvoke or don't block the MainThread.
If this isn't the case post sscce, that will be helpful for us
Change your code like this, to use async and await. This is the modern alternative to using BeginInvoke and other asynchronous code models.
private async Task ReadInputFile()
{
var collection = new ConcurrentBag<PropertyRecord>();
var lines = System.IO.File.ReadLines(InputFileName);
int i = 0;
int RecordsCount = lines.Count();
Parallel.ForEach(lines, line =>
{
if (string.IsNullOrWhiteSpace(line))
{
return;
}
var tokens = line.Split(',');
var postalCode = tokens[0];
var country = tokens.Length > 1 ? tokens[1] : "england";
SetLabelNotifyTwoText(
string.Format(
"Reading PostCode {0} out of {1}"
i,
lines.Length));
var tempRecord = await GetAllAddesses(postalCode, country);
if (tempRecord != null)
{
foreach (PropertyRecord r in tempRecord)
{
collection.Add(r);
}
}
});
}
private async Task<List<PropertyRecord>> GetAllAddesses(
string postalCode,
string country = "england")
{
SetLabelNotifyText("");
progressBar1.Value = 0;
progressBar1.Update();
var records = new List<PropertyRecord>();
using (WebClient w = new WebClient())
{
var url = CreateUrl(postalCode, country);
var document = await w.DownloadStringTaskAsync(url);
var pagesCount = GetPagesCount(document);
if (pagesCount == null)
{
return null;
}
for (int i = 0; i < pagesCount; i++)
{
SetLabelNotifyText(
string.Format(
"Reading Page {0} out of {1}",
i,
pagesCount - 1));
url = CreateUrl(postalcode,country, i);
document = await w.DownloadStringTaskAsync(url);
var collection = Regex.Matches(
document,
"<div class=\"soldDetails\">(.|\\n|\\r)*?class=" +
"\"soldAddress\".*?>(?<address>.*?)(</a>|</div>)" +
"(.|\\n|\\r)*?class=\\\"noBed\\\">(?<noBed>.*?)" +
"</td>|</tbody>");
foreach (var match in collection)
{
var r = new PropertyRecord();
var bedroomCount = match.Groups["noBed"].Value;
if(!string.IsNullOrEmpty(bedroomCount))
{
r.BedroomCount = bedroomCount;
}
else
{
r.BedroomCount = "-1";
}
r.address = match.Groups["address"].Value;
var line = string.Format(
"\"{0}\",{1}",
r.address
r.BedroomCount);
OutputLines.Add(line);
Records.Add(r);
}
}
}
return Records;
}
Then call it like this
ReadInputFile.Wait();
or, even better, is the caller is async,
await ReadInputFile();

How to handle continue statement in threading

Below code is used to move the data from local system to external hard drives. Client's requirement is to use threading for the same so as to speed up the copy of pages / minute.
I am facing one issue. I used continue statement under while loop which is not acceptable under thread. Can someone please suggest alternative of the same.
Thanks.
private void PromoteMain(TextWriter streamWriter)
{
string driveLetterString;
driveLetterString = GetExternalDrive();
if (!string.IsNullOrEmpty(driveLetterString))
{
DataSet dataSet;
DateTime startSnooze;
DataRow row;
string imageIdString;
string reelIdString;
string destImagePathString;
string[] strArrays;
int i;
string fileName;
string[] strArrays3;
string[] strArrays4;
bool flag;
string[] strArrays5;
bool flag1;
string[] strArrays6;
bool flag2;
string[] files;
TimeSpan timeSpan1;
IEnumerator iEnumerator1;
string[] stringArray1;
IDisposable iDisposable1;
dataSet = this.GetImagesFromDatabase();
if (((dataSet != null) && (dataSet.Tables.Count != 0)) && (dataSet.Tables[0].Rows.Count != 0))
{
iEnumerator1 = dataSet.Tables[0].Rows.GetEnumerator();
try
{
#region goto L_05A9
//goto L_05A9;
while (iEnumerator1.MoveNext())
{
driveLetterString = GetExternalDrive();
new Thread(() =>
{
if (!string.IsNullOrEmpty(driveLetterString))
{
row = ((DataRow)iEnumerator1.Current);
imageIdString = row["imageid"].ToString();
reelIdString = row["reelid"].ToString();
destImagePathString = row["destimagepath"].ToString();
if (!Directory.Exists(destImagePathString))
{
if (this.stringBuilderFail.Length > 0)
{
StringBuilder stringBuilder7 = this.stringBuilderFail.Append(",");
}
StringBuilder stringBuilder8 = this.stringBuilderFail.Append(imageIdString);
this._lblFailed.Text = Convert.ToString(((int)(Convert.ToInt32(this._lblFailed.Text) + 1)));
if (this.stringBuilderFail.Length > 7000)
{
this.UpdateImagesInDatabase(this.stringBuilderFail.ToString(), "5");
StringBuilder stringBuilder9 = this.stringBuilderFail.Remove(0, this.stringBuilderFail.Length);
}
this.RefreshFormView();
continue;
}
if (!Directory.Exists((driveLetterString + "newspaper\\" + reelIdString + "\\")))
{
DirectoryInfo directoryInfo1 = Directory.CreateDirectory((driveLetterString + "newspaper\\" + reelIdString + "\\"));
}
strArrays = new string[] { ".pdf", ".html", "_clean.html", "_lx.jpg" };
for (i = 0; (i < strArrays.Length); i++)
{
fileName = strArrays[i];
try
{
if (i != 2)
{
strArrays3 = new string[] { driveLetterString, "newspaper\\", reelIdString, "\\", imageIdString, fileName };
File.Copy((destImagePathString + imageIdString + fileName), string.Concat(strArrays3));
}
}
catch
{
}
}
strArrays4 = new string[] { driveLetterString, "newspaper\\", reelIdString, "\\", imageIdString, ".pdf" };
flag = File.Exists(string.Concat(strArrays4));
strArrays5 = new string[] { driveLetterString, "newspaper\\", reelIdString, "\\", imageIdString, "_lx.jpg" };
flag1 = File.Exists(string.Concat(strArrays5));
strArrays6 = new string[] { driveLetterString, "newspaper\\", reelIdString, "\\", imageIdString, ".html" };
flag2 = File.Exists(string.Concat(strArrays6));
if ((flag && flag1) && flag2)
{
streamWriter.WriteLine(imageIdString);
if (this.stringBuilderSuccess.Length > 0)
{
StringBuilder stringBuilder3 = this.stringBuilderSuccess.Append(",");
}
StringBuilder stringBuilder4 = this.stringBuilderSuccess.Append(imageIdString);
this._lblPromoted.Text = Convert.ToString(((int)(Convert.ToInt32(this._lblPromoted.Text) + 1)));
#region goto L_0453
L_0453();
continue;
#endregion
}
files = Directory.GetFiles((driveLetterString + "newspaper\\" + reelIdString + "\\"), (imageIdString + "*.*"));
stringArray1 = files;
foreach (string str6 in stringArray1)
{
try
{
File.Delete(str6);
}
catch
{
}
}
if (this.stringBuilderFail.Length > 0)
{
StringBuilder stringBuilder1 = this.stringBuilderFail.Append(",");
}
StringBuilder stringBuilder2 = this.stringBuilderFail.Append(imageIdString);
this._lblFailed.Text = Convert.ToString(((int)(Convert.ToInt32(this._lblFailed.Text) + 1)));
#region goto L_0453
L_0453();
continue;
#endregion
}
else
{
break;
};
}).Start();
}
#endregion
}
finally
{
iDisposable1 = (iEnumerator1 as IDisposable);
if (iDisposable1 != null)
{
iDisposable1.Dispose();
}
}
if (this.stringBuilderSuccess.Length > 0)
{
this.UpdateImagesInDatabase(this.stringBuilderSuccess.ToString(), "3");
StringBuilder stringBuilder10 = this.stringBuilderSuccess.Remove(0, this.stringBuilderSuccess.Length);
}
if (this.stringBuilderFail.Length > 0)
{
this.UpdateImagesInDatabase(this.stringBuilderFail.ToString(), "5");
StringBuilder stringBuilder11 = this.stringBuilderFail.Remove(0, this.stringBuilderFail.Length);
}
this.RefreshFormView();
#region goto L_0671
PromoteMain(streamWriter);
#endregion
}
else
{
startSnooze = DateTime.Now;
timeSpan1 = DateTime.Now.Subtract(startSnooze);
while (timeSpan1.TotalSeconds < 300D)
{
timeSpan1 = DateTime.Now.Subtract(startSnooze);
Thread.Sleep(125);
Application.DoEvents();
}
PromoteMain(streamWriter);
}
}
//else
//{
// MessageBox.Show("There is no space in selected drives");
// //there is no space in any external drive
//}
}
private void L_0453()
{
if (this.stringBuilderSuccess.Length > 7000)
{
this.UpdateImagesInDatabase(this.stringBuilderSuccess.ToString(), "3");
StringBuilder stringBuilder5 = this.stringBuilderSuccess.Remove(0, this.stringBuilderSuccess.Length);
}
if (this.stringBuilderFail.Length > 7000)
{
this.UpdateImagesInDatabase(this.stringBuilderFail.ToString(), "5");
StringBuilder stringBuilder6 = this.stringBuilderFail.Remove(0, this.stringBuilderFail.Length);
}
this.RefreshFormView();
}
Replace the continue statement with a return. This will complete the execution of the thread. Now I don't know how this continue statement ended up there but you can't actually continue since the operations are running in parallel. You should also extract this line
row = ((DataRow)iEnumerator1.Current);
or you risk the enumerator to change from another thread before it executes. You may also want to try Parallel.Foreach to reduce possible errors and get more effective execution. Even better would be to use async methods with async IO (as opposed to methods that use threads) and use Task.WaitAll to wait for them to complete.

Categories

Resources