FileSystemProvider for Umbraco v6.1 - c#

I'm trying to create a new MediaFileSystemProvider which seamlessly stores Media in Azure Blob Storage.
I copied the MediaFileSystem class from Umbraco v6.1 source for my starting point.
I then edited the /config/FileSystemProviders.config file inserting my new class details.
When I restart Umbraco, the new class is called but I get the error:
"Could not find constructor for type 'mysite.core.umbracoExtensions.FileSystemProviders.AzureBlobStorageProvider, mysite.core' which accepts 0 parameters"
Here's my class:
...
[FileSystemProvider("media")]
public class AzureBlobStorageProvider : FileSystemWrapper
{
private string rootUrl;
CloudStorageAccount storageAccount;
CloudBlobClient blobClient;
CloudBlobContainer container;
public AzureBlobStorageProvider(IFileSystem wrapped)
: base(wrapped)
{
var constring = ConfigurationManager.ConnectionStrings["StorageConnectionString"].ConnectionString;
// Retrieve storage account from connection string.
storageAccount = CloudStorageAccount.Parse(constring);
// Create the blob client.
blobClient = storageAccount.CreateCloudBlobClient();
// Retrieve reference to a previously created container.
container = blobClient.GetContainerReference("mymedia");
//container.CreateIfNotExists();
//container.SetPermissions(new BlobContainerPermissions { PublicAccess = BlobContainerPublicAccessType.Blob });
rootUrl = "https://mysite.blob.core.windows.net/media";
}
...
methods
Any idea what I'm doing wrong here?
Cheers

I couldn't leave this half done considering all the help I've received from folks here :)
I setup a fresh Umbraco v6.1.6 and confirmed the MediaService.Deleted event definitely does not get fired/hooked for me with the below code. I'll go find out how to submit a bug...
For anyone else interested in storing Umbraco Media Items in Azure Storage, below is how I did it. You can turn the CDN on/off for displaying image content with the "CDNEnabled" key and turn uploading on/off with the "AzureCDNUploadEnabled" key without having to touch your Views each time.
FYI, the actual CDN part of Azure Blob Storage is not available at the moment. It was and now isn't and apparently will be again some day.
You can limit your data usage and speed up image delivery by setting the "AzureCDNCacheControlHeader" value to update the Cache-Control header as you upload. The values below set the images to expire in 30 days and then revalidate.
Add this to your web.config appsettings node:
<!-- cdn config -->
<!-- used for razor rendering -->
<add key="CDNPath" value="https://utest.blob.core.windows.net"/>
<add key="CDNEnabled" value="true"/>
<!-- used for media uploads -->
<add key="AzureCDNStorageConnectionString" value="DefaultEndpointsProtocol=https;AccountName={yourAccount};AccountKey={yourKey}"/>
<add key="AzureCDNStorageAccountName" value="{yourStorageAccount}"/>
<add key="AzureCDNBlobContainerName" value="media"/>
<add key="AzureCDNRootUrl" value="https://{yourAccount}.blob.core.windows.net"/>
<add key="AzureCDNUploadEnabled" value="true"/>
<add key="AzureCDNCacheControlHeader" value="must-revalidate, public, max-age=604800"/> <!-- change to whatever suits you -->
<!-- end cdn -->
This is the EventHandler:
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using System;
using System.Configuration;
using System.Web;
using System.Linq;
using System.IO;
using Umbraco.Core;
using Umbraco.Core.Events;
using Umbraco.Core.Models;
using Umbraco.Core.Services;
namespace utest1.umbracoExtensions.events
{
public class SaveMediaToAzure : ApplicationEventHandler
{
/* either add your own logging class or remove this and all calls to 'log' */
private log4net.ILog log = log4net.LogManager.GetLogger(typeof(utest1.logging.PublicLogger));
CloudStorageAccount storageAccount;
private string blobContainerName;
CloudBlobClient blobClient;
CloudBlobContainer container;
string cacheControlHeader;
private bool uploadEnabled;
public SaveMediaToAzure()
{
try
{
storageAccount = CloudStorageAccount.Parse(ConfigurationManager.AppSettings["AzureCDNStorageConnectionString"]);
blobContainerName = ConfigurationManager.AppSettings["AzureCDNStorageAccountName"];
blobClient = storageAccount.CreateCloudBlobClient();
container = blobClient.GetContainerReference(ConfigurationManager.AppSettings["AzureCDNBlobContainerName"]);
uploadEnabled = Convert.ToBoolean(ConfigurationManager.AppSettings["AzureCDNUploadEnabled"]);
cacheControlHeader = ConfigurationManager.AppSettings["AzureCDNCacheControlHeader"];
MediaService.Saved += MediaServiceSaved;
MediaService.Trashed += MediaServiceTrashed;
MediaService.Deleted += MediaServiceDeleted; // not firing
}
catch (Exception x)
{
log.Error("SaveMediaToAzure Config Error", x);
}
}
void MediaServiceSaved(IMediaService sender, SaveEventArgs<IMedia> e)
{
if (uploadEnabled)
{
foreach (var fileItem in e.SavedEntities)
{
try
{
log.Info("Saving media to Azure:" + e.SavedEntities.First().Name);
var path = fileItem.GetValue("umbracoFile").ToString();
var filePath = HttpContext.Current.Server.MapPath(path);
UploadToAzure(filePath, path);
if (fileItem.GetType() == typeof(Umbraco.Core.Models.Media))
{
UploadThumbToAzure(filePath, path);
}
}
catch (Exception x)
{
log.Error("Error saving media to Azure: " + fileItem.Name, x);
}
}
}
}
/*
* Using this because MediaServiceDeleted event is not firing in v6.1.6
*
*/
void MediaServiceTrashed(IMediaService sender, MoveEventArgs<IMedia> e)
{
if (uploadEnabled)
{
try
{
log.Info("Deleting media from Azure:" + e.Entity.Name);
var path = e.Entity.GetValue("umbracoFile").ToString();
CloudBlockBlob imageBlob = container.GetBlockBlobReference(StripContainerNameFromPath(path));
imageBlob.Delete();
CloudBlockBlob thumbBlob = container.GetBlockBlobReference(StripContainerNameFromPath(GetThumbPath(path)));
thumbBlob.Delete();
}
catch (Exception x)
{
log.Error("Error deleting media from Azure: " + e.Entity.Name, x);
}
}
}
/*
* MediaServiceDeleted event not firing in v6.1.6
*
*/
void MediaServiceDeleted(IMediaService sender, DeleteEventArgs<IMedia> e)
{
//if (uploadEnabled)
//{
// try
// {
// log.Info("Deleting media from Azure:" + e.DeletedEntities.First().Name);
// var path = e.DeletedEntities.First().GetValue("umbracoFile").ToString();
// CloudBlockBlob imageBlob = container.GetBlockBlobReference(StripContainerNameFromPath(path));
// imageBlob.Delete();
// CloudBlockBlob thumbBlob = container.GetBlockBlobReference(StripContainerNameFromPath(GetThumbPath(path)));
// thumbBlob.Delete();
// }
// catch (Exception x)
// {
// log.Error("Error deleting media from Azure: " + e.DeletedEntities.First().Name, x);
// }
//}
Console.WriteLine(e.DeletedEntities.First().Name); // still not working
}
private string StripContainerNameFromPath(string path)
{
return path.Replace("/media/", String.Empty);
}
/*
*
*
*/
private void UploadToAzure(string filePath, string relativePath)
{
System.IO.MemoryStream data = new System.IO.MemoryStream();
System.IO.Stream str = System.IO.File.OpenRead(filePath);
str.CopyTo(data);
data.Seek(0, SeekOrigin.Begin);
byte[] buf = new byte[data.Length];
data.Read(buf, 0, buf.Length);
Stream stream = data as Stream;
if (stream.CanSeek)
{
stream.Position = 0;
CloudBlockBlob blob = container.GetBlockBlobReference(StripContainerNameFromPath(relativePath));
blob.UploadFromStream(stream);
SetCacheControl(blob);
}
else
{
log.Error("Could not read image for upload: " + relativePath);
}
}
private void SetCacheControl(CloudBlockBlob blob)
{
blob.Properties.CacheControl = cacheControlHeader;
blob.SetProperties();
}
private void UploadThumbToAzure(string filePath, string relativePath)
{
var thumbFilePath = GetThumbPath(filePath);
var thumbRelativePath = GetThumbPath(relativePath);
UploadToAzure(thumbFilePath, thumbRelativePath);
}
private string GetThumbPath(string path)
{
var parts = path.Split('.');
var filename = parts[parts.Length - 2];
return path.Replace(filename, filename + "_thumb");
}
}
}
This is the RenderHelper:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
namespace utest1.umbracoExtensions.helpers
{
public class CDNImage
{
public static string ConvertUrlToCDN(string source)
{
if (String.IsNullOrEmpty(source))
{
return null;
}
var cdnUrl = System.Configuration.ConfigurationManager.AppSettings["CDNPath"];
var cdnOn = System.Configuration.ConfigurationManager.AppSettings["CDNEnabled"];
if (cdnOn == "true")
{
/*
* check if the url is absolute or not and whether it should be intercepted - eg. an external image url
* if it's absolute you'll need to strip out everything before /media...
*/
if (source.Contains(GetBaseUrl()))
{
source = StripBaseUrl(source);
}
}
return source;
}
private static string GetBaseUrl()
{
var url = System.Web.HttpContext.Current.Request.Url;
var baseUrl = url.Scheme + "//" + url.Host;
if (url.Port != 80 && url.Port != 443)
{
baseUrl += ":" + url.Port;
}
return baseUrl;
}
private static string StripBaseUrl(string path)
{
return path.Replace(GetBaseUrl(), String.Empty);
}
}
}
and finally displaying in the RazorView:
#inherits Umbraco.Web.Mvc.UmbracoTemplatePage
#{
Layout = "BasePage.cshtml";
}
#using utest1.umbracoExtensions.helpers
#{
var ms = ApplicationContext.Current.Services.MediaService;
var img = ms.GetById(int.Parse(CurrentPage.Image));
}
<h1>Umbraco on Azure is getting there!</h1>
<p>#img.Name</p>
<img alt="#img.Name" src="#CDNImage.ConvertUrlToCDN(img.GetValue("umbracoFile").ToString())" />
Suggestions for improvement are welcome.
Aaaah, it feels good to give back :)

Why is it that I spend hours trying to find the answer and then find it soon after I post?
The issue was two fold:
1) I should've been implementing IFileSystem (took inspiration from AmazonS3Provider source)
2) the parameter name being passed from FileSystemProviders.config file was not included in the constructor

Related

Is it possible to get a batch of text content through Azure DevOps REST API?

I need to get (not download) the content from 10.000~ manifest files within a project in Azure DevOps, but I don't manage to achieve this. I have found several ways to retrieve the content from one file at a time, but in this context, it is neither an efficient nor sustainable solution. I have managed to retrieve all files of a particular file type by checking if the file path ends with the name of the file, then using the TfvcHttpClientBase.GetItemsBatch method. However, this method does not return the item's content.
Program.cs
using Microsoft.TeamFoundation.SourceControl.WebApi;
AzureRest azureRest = new AzureRest();
var tfvcItems = azureRest.GetTfvcItems();
List<TfvcItemDescriptor> itemDescriptorsList = new List<TfvcItemDescriptor>();
foreach(var item in tfvcItems)
{
//Example manifest file .NET
if (item.Path.EndsWith("packages.config"))
{
var itemDescriptor = new TfvcItemDescriptor()
{
Path = item.Path,
RecursionLevel = VersionControlRecursionType.None,
Version = "",
VersionOption = TfvcVersionOption.None,
VersionType = TfvcVersionType.Latest
};
itemDescriptorsList.Add(itemDescriptor);
}
}
TfvcItemDescriptor[] itemDescriptorsArray = itemDescriptorsList.ToArray();
var itemBatch = azureRest.GetTfvcItemsBatch(itemDescriptorsArray);
foreach(var itemList in itemBatch)
{
foreach(var itemListList in itemList)
{
Console.WriteLine("Content: " + itemListList.Content); //empty/null
Console.WriteLine("ContentMetadata: " + itemListList.ContentMetadata); //not empty/null
}
}
AzureRest.cs
using Microsoft.TeamFoundation.SourceControl.WebApi;
using Microsoft.VisualStudio.Services.Common;
using Microsoft.VisualStudio.Services.WebApi;
public class AzureRest
{
const string ORG_URL = "https://org/url/url";
const string PROJECT = "Project";
const string PAT = "PersonalAccessToken";
private string GetTokenConfig()
{
return PAT;
}
private string GetProjectNameConfig()
{
return PROJECT;
}
private VssConnection Authenticate()
{
string token = GetTokenConfig();
string projectName = GetProjectNameConfig();
var credentials = new VssBasicCredential(string.Empty, token);
var connection = new VssConnection(new Uri(ORG_URL), credentials);
return connection;
}
public List<TfvcItem> GetTfvcItems()
{
var connection = Authenticate();
using (TfvcHttpClient tfvcClient = connection.GetClient<TfvcHttpClient>())
{
var tfvcItems = tfvcClient.GetItemsAsync(scopePath: "/Path", recursionLevel: VersionControlRecursionType.Full, true).Result;
return tfvcItems;
}
}
public List<List<TfvcItem>> GetTfvcItemsBatch(TfvcItemDescriptor[] itemDescriptors)
{
TfvcItemRequestData requestData = new TfvcItemRequestData()
{
IncludeContentMetadata = true,
IncludeLinks = true,
ItemDescriptors = itemDescriptors
};
var connection = Authenticate();
using (TfvcHttpClient tfvcClient = connection.GetClient<TfvcHttpClient>())
{
var tfvcItems = tfvcClient.GetItemsBatchAsync(requestData).Result;
return tfvcItems;
}
}
}
}
For reference:
I have tested the codes you shared and when debugging at "itemDescriptorsList" and have found that there is no content specified in it, so that's why you cannot get the txt content.
You should first check and add the content property into the "itemDescriptorsList".

Storing data in local folder is limited

I am trying to save data locally to my device app folder.
When I try to save collected data on an actual Android smartphone, it doesn't work. It is limited by name and filetype, as I cannot change it from test.txt and it is limited in string length, as a maximum of twelve characters get saved.
I have the acquired the following permissions:
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
This is my code:
MainPage.xaml.cs
private async void ExportData(object sender, EventArgs e)
{
var items = await App.Database.GetDataAsync();
DependencyService.Get<IFileService>().CreateFile(items);
}
Interface
using System;
using System.Collections.Generic;
using System.Text;
namespace LocationApp.Interface
{
public interface IFileService
{
void CreateFile(List<LocationData> items);
}
}
Service
using Android.App;
using LocationApp.Droid;
using LocationApp.Interface;
using System.Collections.Generic;
using System.IO;
[assembly:Xamarin.Forms.Dependency(typeof(FileService))]
namespace LocationApp.Droid
{
public class FileService : IFileService
{
public string GetRootPath()
{
return Application.Context.GetExternalFilesDir(null).ToString();
}
public void CreateFile(List<LocationData> items)
{
var fileName = "test-file.txt";
var destination = Path.Combine(GetRootPath(), fileName);
string[] text = new string[items.Count];
for (int i = 0; i < text.Length; i++)
{
text[i] = $"{items[i].Latitude},{items[i].Longitude},{items[i].Day},{items[i].Time}";
}
File.WriteAllLines(destination, text);
}
}
}
I also attempted to see what would happen to an emulator, I used a Pixel 2 with Android 9.0, API 28 where I got the following error:
[ContextImpl] Failed to ensure /storage/120E-0B1B/Android/data/com.companyname.locationapp/files: java.lang.IllegalStateException: Failed to prepare /storage/120E-0B1B/Android/data/com.companyname.locationapp/files/: android.os.ServiceSpecificException: (code -13)
In the end, I only care about putting all my data in a single file. The filename or the error on my emulator I provided in case the error is based on that. If not, I do not care if they are fixed/fixable.
Based on your code, I created a simple demo, and it works on my android emulator(android 11) .
You can test on your side.
The code is:
public void CreateFile(List<LocationData> items)
{
var fileName = "test-file.txt";
var documentsPath = Environment.GetFolderPath(Environment.SpecialFolder.Personal);
var destination = Path.Combine(documentsPath, fileName);
string[] text = new string[items.Count];
for (int i = 0; i < text.Length; i++)
{
text[i] = $"{items[i].Latitude},{items[i].Longitude}";
}
File.WriteAllLines(destination, text);
}
And after I saved the data,I could get the saved data by the following code(the filename is test-file.txt):
public string ReadData(string filename)
{
var documentsPath = Environment.GetFolderPath(Environment.SpecialFolder.Personal);
var filePath = Path.Combine(documentsPath, filename);
return File.ReadAllText(filePath);
}

Parse WebCacheV01.dat in C#

I'm looking to parse the WebCacheV01.dat file using C# to find the last file location for upload in an Internet browser.
%LocalAppData%\Microsoft\Windows\WebCache\WebCacheV01.dat
I using the Managed Esent nuget package.
Esent.Isam
Esent.Interop
When I try and run the below code it fails at:
Api.JetGetDatabaseFileInfo(filePath, out pageSize, JET_DbInfo.PageSize);
Or if I use
Api.JetSetSystemParameter(instance, JET_SESID.Nil, JET_param.CircularLog, 1, null);
at
Api.JetAttachDatabase(sesid, filePath, AttachDatabaseGrbit.ReadOnly);
I get the following error:
An unhandled exception of type
'Microsoft.Isam.Esent.Interop.EsentFileAccessDeniedException' occurred
in Esent.Interop.dll
Additional information: Cannot access file, the file is locked or in use
string localAppDataPath = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
string filePathExtra = #"\Microsoft\Windows\WebCache\WebCacheV01.dat";
string filePath = string.Format("{0}{1}", localAppDataPath, filePathExtra);
JET_INSTANCE instance;
JET_SESID sesid;
JET_DBID dbid;
JET_TABLEID tableid;
String connect = "";
JET_SNP snp;
JET_SNT snt;
object data;
int numInstance = 0;
JET_INSTANCE_INFO [] instances;
int pageSize;
JET_COLUMNDEF columndef = new JET_COLUMNDEF();
JET_COLUMNID columnid;
Api.JetCreateInstance(out instance, "instance");
Api.JetGetDatabaseFileInfo(filePath, out pageSize, JET_DbInfo.PageSize);
Api.JetSetSystemParameter(JET_INSTANCE.Nil, JET_SESID.Nil, JET_param.DatabasePageSize, pageSize, null);
//Api.JetSetSystemParameter(instance, JET_SESID.Nil, JET_param.CircularLog, 1, null);
Api.JetInit(ref instance);
Api.JetBeginSession(instance, out sesid, null, null);
//Do stuff in db
Api.JetEndSession(sesid, EndSessionGrbit.None);
Api.JetTerm(instance);
Is it not possible to read this without making modifications?
Viewer
http://www.nirsoft.net/utils/ese_database_view.html
Python
https://jon.glass/attempts-to-parse-webcachev01-dat/
libesedb
impacket
Issue:
The file is probably in use.
Solution:
in order to free the locked file, please stop the Schedule Task -\Microsoft\Windows\Wininet\CacheTask.
The Code
public override IEnumerable<string> GetBrowsingHistoryUrls(FileInfo fileInfo)
{
var fileName = fileInfo.FullName;
var results = new List<string>();
try
{
int pageSize;
Api.JetGetDatabaseFileInfo(fileName, out pageSize, JET_DbInfo.PageSize);
SystemParameters.DatabasePageSize = pageSize;
using (var instance = new Instance("Browsing History"))
{
var param = new InstanceParameters(instance);
param.Recovery = false;
instance.Init();
using (var session = new Session(instance))
{
Api.JetAttachDatabase(session, fileName, AttachDatabaseGrbit.ReadOnly);
JET_DBID dbid;
Api.JetOpenDatabase(session, fileName, null, out dbid, OpenDatabaseGrbit.ReadOnly);
using (var tableContainers = new Table(session, dbid, "Containers", OpenTableGrbit.ReadOnly))
{
IDictionary<string, JET_COLUMNID> containerColumns = Api.GetColumnDictionary(session, tableContainers);
if (Api.TryMoveFirst(session, tableContainers))
{
do
{
var retrieveColumnAsInt32 = Api.RetrieveColumnAsInt32(session, tableContainers, columnIds["ContainerId"]);
if (retrieveColumnAsInt32 != null)
{
var containerId = (int)retrieveColumnAsInt32;
using (var table = new Table(session, dbid, "Container_" + containerId, OpenTableGrbit.ReadOnly))
{
var tableColumns = Api.GetColumnDictionary(session, table);
if (Api.TryMoveFirst(session, table))
{
do
{
var url = Api.RetrieveColumnAsString(
session,
table,
tableColumns["Url"],
Encoding.Unicode);
var downloadedFileName = Api.RetrieveColumnAsString(
session,
table,
columnIds2["Filename"]);
if(string.IsNullOrEmpty(downloadedFileName)) // check for download history only.
continue;
// Order by access Time to find the last uploaded file.
var accessedTime = Api.RetrieveColumnAsInt64(
session,
table,
columnIds2["AccessedTime"]);
var lastVisitTime = accessedTime.HasValue ? DateTime.FromFileTimeUtc(accessedTime.Value) : DateTime.MinValue;
results.Add(url);
}
while (Api.TryMoveNext(session, table.JetTableid));
}
}
}
} while (Api.TryMoveNext(session, tableContainers));
}
}
}
}
}
catch (Exception ex)
{
// log goes here....
}
return results;
}
Utils
Task Scheduler Wrapper
You can use Microsoft.Win32.TaskScheduler.TaskService Wrapper to stop it using c#, just add this Nuget package [nuget]:https://taskscheduler.codeplex.com/
Usage
public static FileInfo CopyLockedFileRtl(DirectoryInfo directory, FileInfo fileInfo, string remoteEndPoint)
{
FileInfo copiedFileInfo = null;
using (var ts = new TaskService(string.Format(#"\\{0}", remoteEndPoint)))
{
var task = ts.GetTask(#"\Microsoft\Windows\Wininet\CacheTask");
task.Stop();
task.Enabled = false;
var byteArray = FileHelper.ReadOnlyAllBytes(fileInfo);
var filePath = Path.Combine(directory.FullName, "unlockedfile.dat");
File.WriteAllBytes(filePath, byteArray);
copiedFileInfo = new FileInfo(filePath);
task.Enabled = true;
task.Run();
task.Dispose();
}
return copiedFileInfo;
}
I was not able to get Adam's answer to work. What worked for me was making a copy with AlphaVSS (a .NET class library that has a managed API for the Volume Shadow Copy Service). The file was in "Dirty Shutdown" state, so I additionally wrote this to handle the exception it threw when I opened it:
catch (EsentErrorException ex)
{ // Usually after the database is copied, it's in Dirty Shutdown state
// This can be verified by running "esentutl.exe /Mh WebCacheV01.dat"
logger.Info(ex.Message);
switch (ex.Error)
{
case JET_err.SecondaryIndexCorrupted:
logger.Info("Secondary Index Corrupted detected, exiting...");
Api.JetTerm2(instance, TermGrbit.Complete);
return false;
case JET_err.DatabaseDirtyShutdown:
logger.Info("Dirty shutdown detected, attempting to recover...");
try
{
Api.JetTerm2(instance, TermGrbit.Complete);
Process.Start("esentutl.exe", "/p /o " + newPath);
Thread.Sleep(5000);
Api.JetInit(ref instance);
Api.JetBeginSession(instance, out sessionId, null, null);
Api.JetAttachDatabase(sessionId, newPath, AttachDatabaseGrbit.None);
}
catch (Exception e2)
{
logger.Info("Could not recover database " + newPath + ", will try opening it one last time. If that doesn't work, try using other esentutl commands", e2);
}
break;
}
}
I'm thinking about using the 'Recent Items' folder as when you select a file to upload an entry is written here:
C:\Users\USER\AppData\Roaming\Microsoft\Windows\Recent
string recent = (Environment.GetFolderPath(Environment.SpecialFolder.Recent));

How to check if Azure Blob file Exists or Not

I want to check a particular file exist in Azure Blob Storage. Is it possible to check by specifying it's file name? Each time i got File Not Found Error.
var blob = client.GetContainerReference(containerName).GetBlockBlobReference(blobFileName);
if (blob.Exists())
//do your stuff
This extension method should help you:
public static class BlobExtensions
{
public static bool Exists(this CloudBlob blob)
{
try
{
blob.FetchAttributes();
return true;
}
catch (StorageClientException e)
{
if (e.ErrorCode == StorageErrorCode.ResourceNotFound)
{
return false;
}
else
{
throw;
}
}
}
}
Usage:
static void Main(string[] args)
{
var blob = CloudStorageAccount.DevelopmentStorageAccount
.CreateCloudBlobClient().GetBlobReference(args[0]);
// or CloudStorageAccount.Parse("<your connection string>")
if (blob.Exists())
{
Console.WriteLine("The blob exists!");
}
else
{
Console.WriteLine("The blob doesn't exist.");
}
}
http://blog.smarx.com/posts/testing-existence-of-a-windows-azure-blob
With the updated SDK, once you have the CloudBlobReference you can call Exists() on your reference.
UPDATE
The relevant documentation has been moved to https://learn.microsoft.com/en-us/dotnet/api/microsoft.windowsazure.storage.blob.cloudblob.exists?view=azurestorage-8.1.3#Microsoft_WindowsAzure_Storage_Blob_CloudBlob_Exists_Microsoft_WindowsAzure_Storage_Blob_BlobRequestOptions_Microsoft_WindowsAzure_Storage_OperationContext_
My implementation using WindowsAzure.Storage v2.0.6.1
private CloudBlockBlob GetBlobReference(string filePath, bool createContainerIfMissing = true)
{
CloudBlobClient client = _account.CreateCloudBlobClient();
CloudBlobContainer container = client.GetContainerReference("my-container");
if ( createContainerIfMissing && container.CreateIfNotExists())
{
//Public blobs allow for public access to the image via the URI
//But first, make sure the blob exists
container.SetPermissions(new BlobContainerPermissions { PublicAccess = BlobContainerPublicAccessType.Blob });
}
CloudBlockBlob blob = container.GetBlockBlobReference(filePath);
return blob;
}
public bool Exists(String filepath)
{
var blob = GetBlobReference(filepath, false);
return blob.Exists();
}
Use the ExistsAsync method of CloudBlockBlob.
bool blobExists = await cloudBlobContainer.GetBlockBlobReference("<name of blob>").ExistsAsync();
Using Microsoft.WindowsAzure.Storage.Blob version 4.3.0.0, the following code should work (there are a lot of breaking changes with older versions of this assembly):
Using container/blob name, and the given API (seems now Microsoft have actualy implemented this):
return _blobClient.GetContainerReference(containerName).GetBlockBlobReference(blobName).Exists();
Using blob URI (workaround):
try
{
CloudBlockBlob cb = (CloudBlockBlob) _blobClient.GetBlobReferenceFromServer(new Uri(url));
cb.FetchAttributes();
}
catch (StorageException se)
{
if (se.Message.Contains("404") || se.Message.Contains("Not Found"))
{
return false;
}
}
return true;
(Fetch attributes will fail if the blob is not existing. Dirty, I know :)
Using the new package Azure.Storage.Blobs
BlobServiceClient blobServiceClient = new BlobServiceClient("YourStorageConnectionString");
BlobContainerClient containerClient = blobServiceClient.GetBlobContainerClient("YourContainerName");
BlobClient blobClient = containerClient.GetBlobClient("YourFileName");
then check if exists
if (blobClient.Exists()){
//your code
}
With the latest version of SDK, you need to use the ExistsAsync Method,
public async Task<bool> FileExists(string fileName)
{
return await directory.GetBlockBlobReference(fileName).ExistsAsync();
}
Here is the code sample.
This complete example is here to help.
public class TestBlobStorage
{
public bool BlobExists(string containerName, string blobName)
{
BlobServiceClient blobServiceClient = new BlobServiceClient(#"<connection string here>");
var container = blobServiceClient.GetBlobContainerClient(containerName);
var blob = container.GetBlobClient(blobName);
return blob.Exists();
}
}
then you can test in main
static void Main(string[] args)
{
TestBlobStorage t = new TestBlobStorage();
Console.WriteLine("blob exists: {0}", t.BlobExists("image-test", "AE665.jpg"));
Console.WriteLine("--done--");
Console.ReadLine();
}
Important I found the file names are case sensitive
## dbutils.widgets.get to call the key-value from data bricks job
storage_account_name= dbutils.widgets.get("storage_account_name")
container_name= dbutils.widgets.get("container_name")
transcripts_path_intent= dbutils.widgets.get("transcripts_path_intent")
# Read azure blob access key from dbutils
storage_account_access_key = dbutils.secrets.get(scope = "inteliserve-blob-storage-secret-scope", key = "storage-account-key")
from azure.storage.blob import BlockBlobService
block_blob_service = BlockBlobService(account_name=storage_account_name, account_key=storage_account_access_key)
def blob_exists():
container_name2 = container_name
blob_name = transcripts_path_intent
exists=(block_blob_service.exists(container_name2, blob_name))
return exists
blobstat = blob_exists()
print(blobstat)

How to implement and do OCR in a C# project?

I ve been searching for a while and all that i ve seen some OCR library requests. I would like to know how to implement the purest, easy to install and use OCR library with detailed info for installation into a C# project.
If posible, I just wanna implement it like a usual dll reference...
Example:
using org.pdfbox.pdmodel;
using org.pdfbox.util;
Also a little OCR code example would be nice, such as:
public string OCRFromBitmap(Bitmap Bmp)
{
Bmp.Save(temppath, System.Drawing.Imaging.ImageFormat.Tiff);
string OcrResult = Analyze(temppath);
File.Delete(temppath);
return OcrResult;
}
So please consider that I'm not familiar to OCR projects and give me an answer like talking to a dummy.
Edit:
I guess people misunderstood my request. I wanted to know how to implement those open source OCR libraries to a C# project and how to use them. The link given as dup is not giving answers that I requested at all.
If anyone is looking into this, I've been trying different options and the following approach yields very good results. The following are the steps to get a working example:
Add .NET Wrapper for tesseract to your project. It can be added via NuGet package Install-Package Tesseract(https://github.com/charlesw/tesseract).
Go to the Downloads section of the official Tesseract project (https://code.google.com/p/tesseract-ocr/ EDIT: It's now located here: https://github.com/tesseract-ocr/langdata).
Download the preferred language data, example: tesseract-ocr-3.02.eng.tar.gz English language data for Tesseract 3.02.
Create tessdata directory in your project and place the language data files in it.
Go to Properties of the newly added files and set them to copy on build.
Add a reference to System.Drawing.
From .NET Wrapper repository, in the Samples directory copy the sample phototest.tif file into your project directory and set it to copy on build.
Create the following two files in your project (just to get started):
Program.cs
using System;
using Tesseract;
using System.Diagnostics;
namespace ConsoleApplication
{
class Program
{
public static void Main(string[] args)
{
var testImagePath = "./phototest.tif";
if (args.Length > 0)
{
testImagePath = args[0];
}
try
{
var logger = new FormattedConsoleLogger();
var resultPrinter = new ResultPrinter(logger);
using (var engine = new TesseractEngine(#"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(testImagePath))
{
using (logger.Begin("Process image"))
{
var i = 1;
using (var page = engine.Process(img))
{
var text = page.GetText();
logger.Log("Text: {0}", text);
logger.Log("Mean confidence: {0}", page.GetMeanConfidence());
using (var iter = page.GetIterator())
{
iter.Begin();
do
{
if (i % 2 == 0)
{
using (logger.Begin("Line {0}", i))
{
do
{
using (logger.Begin("Word Iteration"))
{
if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
{
logger.Log("New block");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.Para))
{
logger.Log("New paragraph");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine))
{
logger.Log("New line");
}
logger.Log("word: " + iter.GetText(PageIteratorLevel.Word));
}
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
}
}
i++;
} while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
}
}
}
}
}
}
catch (Exception e)
{
Trace.TraceError(e.ToString());
Console.WriteLine("Unexpected Error: " + e.Message);
Console.WriteLine("Details: ");
Console.WriteLine(e.ToString());
}
Console.Write("Press any key to continue . . . ");
Console.ReadKey(true);
}
private class ResultPrinter
{
readonly FormattedConsoleLogger logger;
public ResultPrinter(FormattedConsoleLogger logger)
{
this.logger = logger;
}
public void Print(ResultIterator iter)
{
logger.Log("Is beginning of block: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Block));
logger.Log("Is beginning of para: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Para));
logger.Log("Is beginning of text line: {0}", iter.IsAtBeginningOf(PageIteratorLevel.TextLine));
logger.Log("Is beginning of word: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Word));
logger.Log("Is beginning of symbol: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Symbol));
logger.Log("Block text: \"{0}\"", iter.GetText(PageIteratorLevel.Block));
logger.Log("Para text: \"{0}\"", iter.GetText(PageIteratorLevel.Para));
logger.Log("TextLine text: \"{0}\"", iter.GetText(PageIteratorLevel.TextLine));
logger.Log("Word text: \"{0}\"", iter.GetText(PageIteratorLevel.Word));
logger.Log("Symbol text: \"{0}\"", iter.GetText(PageIteratorLevel.Symbol));
}
}
}
}
FormattedConsoleLogger.cs
using System;
using System.Collections.Generic;
using System.Text;
using Tesseract;
namespace ConsoleApplication
{
public class FormattedConsoleLogger
{
const string Tab = " ";
private class Scope : DisposableBase
{
private int indentLevel;
private string indent;
private FormattedConsoleLogger container;
public Scope(FormattedConsoleLogger container, int indentLevel)
{
this.container = container;
this.indentLevel = indentLevel;
StringBuilder indent = new StringBuilder();
for (int i = 0; i < indentLevel; i++)
{
indent.Append(Tab);
}
this.indent = indent.ToString();
}
public void Log(string format, object[] args)
{
var message = String.Format(format, args);
StringBuilder indentedMessage = new StringBuilder(message.Length + indent.Length * 10);
int i = 0;
bool isNewLine = true;
while (i < message.Length)
{
if (message.Length > i && message[i] == '\r' && message[i + 1] == '\n')
{
indentedMessage.AppendLine();
isNewLine = true;
i += 2;
}
else if (message[i] == '\r' || message[i] == '\n')
{
indentedMessage.AppendLine();
isNewLine = true;
i++;
}
else
{
if (isNewLine)
{
indentedMessage.Append(indent);
isNewLine = false;
}
indentedMessage.Append(message[i]);
i++;
}
}
Console.WriteLine(indentedMessage.ToString());
}
public Scope Begin()
{
return new Scope(container, indentLevel + 1);
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
var scope = container.scopes.Pop();
if (scope != this)
{
throw new InvalidOperationException("Format scope removed out of order.");
}
}
}
}
private Stack<Scope> scopes = new Stack<Scope>();
public IDisposable Begin(string title = "", params object[] args)
{
Log(title, args);
Scope scope;
if (scopes.Count == 0)
{
scope = new Scope(this, 1);
}
else
{
scope = ActiveScope.Begin();
}
scopes.Push(scope);
return scope;
}
public void Log(string format, params object[] args)
{
if (scopes.Count > 0)
{
ActiveScope.Log(format, args);
}
else
{
Console.WriteLine(String.Format(format, args));
}
}
private Scope ActiveScope
{
get
{
var top = scopes.Peek();
if (top == null) throw new InvalidOperationException("No current scope");
return top;
}
}
}
}
Here's one: (check out http://hongouru.blogspot.ie/2011/09/c-ocr-optical-character-recognition.html or http://www.codeproject.com/Articles/41709/How-To-Use-Office-2007-OCR-Using-C for more info)
using MODI;
static void Main(string[] args)
{
DocumentClass myDoc = new DocumentClass();
myDoc.Create(#"theDocumentName.tiff"); //we work with the .tiff extension
myDoc.OCR(MiLANGUAGES.miLANG_ENGLISH, true, true);
foreach (Image anImage in myDoc.Images)
{
Console.WriteLine(anImage.Layout.Text); //here we cout to the console.
}
}
I'm using tesseract OCR engine with TessNet2 (a C# wrapper - http://www.pixel-technology.com/freeware/tessnet2/).
Some basic code:
using tessnet2;
...
Bitmap image = new Bitmap(#"u:\user files\bwalker\2849257.tif");
tessnet2.Tesseract ocr = new tessnet2.Tesseract();
ocr.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,$-/#&=()\"':?"); // Accepted characters
ocr.Init(#"C:\Users\bwalker\Documents\Visual Studio 2010\Projects\tessnetWinForms\tessnetWinForms\bin\Release\", "eng", false); // Directory of your tessdata folder
List<tessnet2.Word> result = ocr.DoOCR(image, System.Drawing.Rectangle.Empty);
string Results = "";
foreach (tessnet2.Word word in result)
{
Results += word.Confidence + ", " + word.Text + ", " + word.Left + ", " + word.Top + ", " + word.Bottom + ", " + word.Right + "\n";
}
Some online API's work pretty well: ocr.space and Google Cloud Vision. Both of these are free, as long as you do less than 1000 OCR's per month. You can drag & drop an image to do a quick manual test to see how they perform for your images.
I find OCR.space easier to use (no messing around with nuget libraries), but, for my purpose, Google Cloud Vision provided slightly better results than OCR.space.
Google Cloud Vision example:
GoogleCredential cred = GoogleCredential.FromJson(json);
Channel channel = new Channel(ImageAnnotatorClient.DefaultEndpoint.Host, ImageAnnotatorClient.DefaultEndpoint.Port, cred.ToChannelCredentials());
ImageAnnotatorClient client = ImageAnnotatorClient.Create(channel);
Image image = Image.FromStream(stream);
EntityAnnotation googleOcrText = client.DetectText(image).First();
Console.Write(googleOcrText.Description);
OCR.space example:
string uri = $"https://api.ocr.space/parse/imageurl?apikey=helloworld&url={imageUri}";
string responseString = WebUtilities.DoGetRequest(uri);
OcrSpaceResult result = JsonConvert.DeserializeObject<OcrSpaceResult>(responseString);
if ((!result.IsErroredOnProcessing) && !String.IsNullOrEmpty(result.ParsedResults[0].ParsedText))
return result.ParsedResults[0].ParsedText;
A new API is OcrEngine.RecognizeAsync from WinRT/UWP. It can also be used in WinForms:
...
//for AsBuffer
using System.Runtime.InteropServices.WindowsRuntime;
...
async private void button5_Click(object sender, EventArgs e)
{
OcrEngine ocrEngine = null;
ocrEngine = OcrEngine.TryCreateFromUserProfileLanguages();
if (ocrEngine == null) return;
//convert the image to BGRA8 format which is needed by SoftwareBitmap
//is there a better method for this?
Bitmap img = new Bitmap(#"1.png");
byte[] ba = new byte[img.Width * img.Height * 4];
int o = 0;
for (int y = 0; y < img.Height; y++)
{
for (int x = 0; x < img.Width; x++)
{
var p = img.GetPixel(x, y);
ba[o++] = p.B;
ba[o++] = p.G;
ba[o++] = p.R;
ba[o++] = p.A;
}
}
var buffer = ba.AsBuffer();
var outputBitmap = SoftwareBitmap.CreateCopyFromBuffer(
buffer,
BitmapPixelFormat.Bgra8,
img.Width,
img.Height);
var ocrResult = await ocrEngine.RecognizeAsync(outputBitmap);
}
To use WinRT/UWP API in WinForms, add Nuget package "Microsoft.Windows.SDK.Contracts" (version 10.0.17134.100 for Win10 1803 SDK tested here) as described here.

Categories

Resources