I'm trying to convert screenshots taken on an HDR display via IDXGIOutput6.DuplicateOutput1() into SDR GDI+ bitmaps. The color space of the display is DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 and the pixel format is DXGI_FORMAT_R16G16B16A16_FLOAT.
I can't find a series of Direct2D transforms that actually gives me a reasonable result though. I've tried many different combinations of effects and settings, but the output continues to either come out too bright, washed out, low contrast, or oversaturated.
The gist of what I'm doing is:
Grab HDR frame from the output
Bring it into a D2D bitmap
Calculate max content light level
Change color space to scRGB
Apply HDR tone mapping
Change color space to sRGB
Use WIC to convert to GUID_WICPixelFormat32bppPBGRA
Copy raw pixel data to a GDI+ bitmap with format PixelFormat.Format32bppPArgb
Currently I'm seeing the issue of the output being too bright, but I have no idea why. Any help in figuring out what the transform chain actually should be would be appreciated.
private const float NominalRefWhite = 80.0f;
private const int HistNumBins = 400;
private const float HistGamma = 0.1f;
private const int HistMaxNits = 1000000;
private const float OutputNits = 80.0f;
private Bitmap CaptureDxgiOutputFrame(IComObject<IDXGIOutput6> output, IComObject<ID3D11Device> device, IComObject<ID3D11DeviceContext> deviceCtx)
{
output.Object.GetDesc1(out var outputDesc).ThrowOnError();
Debug.WriteLine($"CaptureDxgiOutputFrame - OutputDesc: {ToDebugString(outputDesc)}");
var (frameInfo, frameTex) = DuplicateOutputFrame(output, device, deviceCtx);
using (var d2dDev = D2DFactoryInstance.Object.CreateDevice<ID2D1Device6>(device.As<IDXGIDevice>(true)))
using (var d2dCtx = d2dDev.Object.CreateDeviceContext<ID2D1DeviceContext6>())
{
var frameSurface = frameTex.AsNonOwned<IDXGISurface>();
frameTex.Object.GetDesc(out var frameDesc);
Debug.WriteLine($"CaptureDxgiOutputFrame - FrameDesc: {ToDebugString(frameDesc)}");
using (var inputColorContext = d2dCtx.CreateColorContextFromDxgiColorSpace(outputDesc.ColorSpace))
using (var scRgbColorContext = d2dCtx.CreateColorContext(D2D1_COLOR_SPACE.D2D1_COLOR_SPACE_SCRGB))
using (var sRgbColorContext = d2dCtx.CreateColorContext(D2D1_COLOR_SPACE.D2D1_COLOR_SPACE_SRGB))
using (var frameBmp = d2dCtx.CreateBitmapFromDxgiSurface(frameSurface, frameDesc.Format, inputColorContext))
using (var targetBmp = d2dCtx.CreateBitmap(frameDesc.Width, frameDesc.Height,
DXGI_FORMAT.DXGI_FORMAT_R16G16B16A16_FLOAT, sRgbColorContext,
D2D1_BITMAP_OPTIONS.D2D1_BITMAP_OPTIONS_TARGET | D2D1_BITMAP_OPTIONS.D2D1_BITMAP_OPTIONS_CANNOT_DRAW))
using (var inputToScRgbEffect = new D2DColorManagementEffect(d2dCtx))
using (var scRgbToSRgbEffect = new D2DColorManagementEffect(d2dCtx))
using (var hdrToneMapEffect = new D2DHdrToneMapEffect(d2dCtx))
{
d2dCtx.SetTarget(targetBmp);
var maxCll = CalculateMaxCll(d2dCtx, frameBmp);
Debug.WriteLine($"CaptureDxgiOutputFrame - Max CLL: {maxCll}");
inputToScRgbEffect.Quality = D2D1_COLORMANAGEMENT_QUALITY.D2D1_COLORMANAGEMENT_QUALITY_BEST;
inputToScRgbEffect.InputColorContext = inputColorContext;
inputToScRgbEffect.OutputColorContext = scRgbColorContext;
scRgbToSRgbEffect.Quality = D2D1_COLORMANAGEMENT_QUALITY.D2D1_COLORMANAGEMENT_QUALITY_BEST;
scRgbToSRgbEffect.InputColorContext = scRgbColorContext;
scRgbToSRgbEffect.OutputColorContext = sRgbColorContext;
hdrToneMapEffect.InputMaxLuminance = maxCll;
hdrToneMapEffect.OutputMaxLuminance = OutputNits;
hdrToneMapEffect.DisplayMode = D2D1_HDRTONEMAP_DISPLAY_MODE.D2D1_HDRTONEMAP_DISPLAY_MODE_SDR;
inputToScRgbEffect.SetInput(frameBmp);
hdrToneMapEffect.SetInput(inputToScRgbEffect);
scRgbToSRgbEffect.SetInput(hdrToneMapEffect);
d2dCtx.BeginDraw();
d2dCtx.DrawImage(scRgbToSRgbEffect.Effect);
d2dCtx.EndDraw();
var gdiBmp = To32BppGdiBitmap(d2dCtx, targetBmp);
gdiBmp.Save(DebugDir + "gdi.bmp", ImageFormat.Bmp);
return gdiBmp;
}
}
}
private static Bitmap To32BppGdiBitmap(IComObject<ID2D1DeviceContext6> d2dCtx, IComObject<ID2D1Bitmap> srcBmp)
{
var srcSize = srcBmp.GetSize();
var srcD2DPixelFormat = srcBmp.GetPixelFormat();
Debug.WriteLine($"To32BppGdiBitmap - Source D2D format: {Enum.GetName(typeof(DXGI_FORMAT), srcD2DPixelFormat.format)}, {Enum.GetName(typeof(D2D1_ALPHA_MODE), srcD2DPixelFormat.alphaMode)}");
using (var colorContext = d2dCtx.CreateColorContext(D2D1_COLOR_SPACE.D2D1_COLOR_SPACE_SRGB))
using (var srcBmpCpu = d2dCtx.CreateBitmap((uint)srcSize.width, (uint)srcSize.height, srcD2DPixelFormat.format,
colorContext, D2D1_BITMAP_OPTIONS.D2D1_BITMAP_OPTIONS_CPU_READ | D2D1_BITMAP_OPTIONS.D2D1_BITMAP_OPTIONS_CANNOT_DRAW,
srcD2DPixelFormat.alphaMode))
{
srcBmpCpu.Object.CopyFromBitmap(IntPtr.Zero, srcBmp.Object, IntPtr.Zero).ThrowOnError();
var srcWicPixelFormat = D2DPixelFormatToWicPixelFormat(srcD2DPixelFormat);
Debug.WriteLine($"To32BppGdiBitmap - Source WIC format: {WICPixelFormat.GuidToName[srcWicPixelFormat]}");
using (var mappedData = srcBmpCpu.Map(D2D1_MAP_OPTIONS.D2D1_MAP_OPTIONS_READ))
using (var wicBmp = WicFactoryInstance.CreateBitmapFromMemory((uint)srcSize.width,
(uint)srcSize.height,
srcWicPixelFormat, mappedData.Pitch,
(int)mappedData.Pitch * (int)srcSize.height, mappedData.Bits))
using (var formatConverter = WicFactoryInstance.CreateFormatConverter())
{
var targetWicPixelFormat = WICPixelFormat.Format32bppPBGRA;
formatConverter.Object.Initialize(wicBmp.Object, ref targetWicPixelFormat,
WICBitmapDitherType.WICBitmapDitherTypeNone,
null, 0.0, WICBitmapPaletteType.WICBitmapPaletteTypeCustom);
return ToGdiBitmap(formatConverter);
}
}
}
private static float CalculateMaxCll(IComObject<ID2D1DeviceContext6> d2dCtx, IComObject<ID2D1Bitmap> bmp)
{
using (var histogramMatrixEffect = new D2DColorMatrixEffect(d2dCtx))
using (var histogramGammaEffect = new D2DGammaTransferEffect(d2dCtx))
using (var histogramEffect = new D2DHistogramEffect(d2dCtx))
{
var scale = HistMaxNits / NominalRefWhite;
histogramMatrixEffect.ColorMatrix = new D2D_MATRIX_5X4_F
{
_11 = 0.2126f / scale,
_21 = 0.7152f / scale,
_31 = 0.0722f / scale,
_44 = 1.0f,
};
histogramGammaEffect.RedExponent = HistGamma;
histogramGammaEffect.GreenDisable = true;
histogramGammaEffect.BlueDisable = true;
histogramGammaEffect.AlphaDisable = true;
histogramEffect.NumBins = HistNumBins;
histogramMatrixEffect.SetInput(bmp);
histogramGammaEffect.SetInput(histogramMatrixEffect);
histogramEffect.SetInput(histogramGammaEffect);
d2dCtx.BeginDraw();
d2dCtx.DrawImage(histogramEffect.Effect);
d2dCtx.EndDraw();
var buckets = histogramEffect.ReadOutput();
int maxCllBin = 0;
var runningSum = 0.0;
for (int i = buckets.Length - 1; i >= 0; i--)
{
runningSum += buckets[i];
maxCllBin = i;
if (runningSum >= 1.0 - 0.9999)
break;
}
var binNorm = (double)maxCllBin / HistNumBins;
var maxCll = Math.Pow(binNorm, 1 / HistGamma) * HistMaxNits;
return (float)maxCll;
}
}
private static Bitmap To32BppGdiBitmap(IComObject<ID2D1DeviceContext6> d2dCtx, IComObject<ID2D1Bitmap> srcBmp)
{
var srcSize = srcBmp.GetSize();
var srcD2DPixelFormat = srcBmp.GetPixelFormat();
Debug.WriteLine($"To32BppGdiBitmap - Source D2D format: {Enum.GetName(typeof(DXGI_FORMAT), srcD2DPixelFormat.format)}, {Enum.GetName(typeof(D2D1_ALPHA_MODE), srcD2DPixelFormat.alphaMode)}");
using (var colorContext = d2dCtx.CreateColorContext(D2D1_COLOR_SPACE.D2D1_COLOR_SPACE_SRGB))
using (var srcBmpCpu = d2dCtx.CreateBitmap((uint)srcSize.width, (uint)srcSize.height, srcD2DPixelFormat.format,
colorContext, D2D1_BITMAP_OPTIONS.D2D1_BITMAP_OPTIONS_CPU_READ | D2D1_BITMAP_OPTIONS.D2D1_BITMAP_OPTIONS_CANNOT_DRAW,
srcD2DPixelFormat.alphaMode))
{
srcBmpCpu.Object.CopyFromBitmap(IntPtr.Zero, srcBmp.Object, IntPtr.Zero).ThrowOnError();
var srcWicPixelFormat = D2DPixelFormatToWicPixelFormat(srcD2DPixelFormat);
Debug.WriteLine($"To32BppGdiBitmap - Source WIC format: {WICPixelFormat.GuidToName[srcWicPixelFormat]}");
using (var mappedData = srcBmpCpu.Map(D2D1_MAP_OPTIONS.D2D1_MAP_OPTIONS_READ))
using (var wicBmp = WicFactoryInstance.CreateBitmapFromMemory((uint)srcSize.width,
(uint)srcSize.height,
srcWicPixelFormat, mappedData.Pitch,
(int)mappedData.Pitch * (int)srcSize.height, mappedData.Bits))
using (var formatConverter = WicFactoryInstance.CreateFormatConverter(wicBmp, WICPixelFormat.Format32bppPBGRA,
WICBitmapDitherType.WICBitmapDitherTypeNone, null, 0.0, WICBitmapPaletteType.WICBitmapPaletteTypeCustom))
{
return ToGdiBitmap(formatConverter);
}
}
}
private static Bitmap ToGdiBitmap(IComObject<IWICBitmapSource> srcBmp)
{
if (srcBmp.GetPixelFormat() != WICPixelFormat.Format32bppPBGRA)
throw new ArgumentException("Input must be 32bpp BGRA with pre-multiplied alpha.");
var sz = srcBmp.GetSize();
var stride = (int)sz.width * 4;
var gdiBmp = new Bitmap((int)sz.width, (int)sz.height, PixelFormat.Format32bppPArgb);
var buf = new byte[(int)sz.height * stride];
srcBmp.Object.CopyPixels(IntPtr.Zero, (uint)stride, buf.Length, buf).ThrowOnError();
var gdiBmpData = gdiBmp.LockBits(new Rectangle(0, 0, (int)sz.width, (int)sz.height), ImageLockMode.WriteOnly,
PixelFormat.Format32bppPArgb);
try
{
unsafe
{
fixed (byte* src = buf)
{
Buffer.MemoryCopy(src, (void*)gdiBmpData.Scan0, buf.Length, buf.Length);
}
}
}
finally
{
gdiBmp.UnlockBits(gdiBmpData);
}
return gdiBmp;
}
Related
using System.Drawing;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms.Image;
namespace OnnxTest;
public static class Program
{
public static void Main(string[] args)
{
var tags = File.ReadLines(#"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
var imageLocation = #"C:\Users\da3ds\Pictures\image.jpg";
var modelLocation = #"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
MLContext mlContext = new MLContext();
Console.WriteLine("Read model");
Console.WriteLine($"Model location: {modelLocation}");
Console.WriteLine(
$"Default parameters: image size=({InputModel.imageWidth},{InputModel.imageHeight})");
Console.WriteLine($"Images location: {imageLocation}");
Console.WriteLine("");
Console.WriteLine("=====Identify the objects in the images=====");
Console.WriteLine("");
// Create IDataView from empty list to obtain input data schema
var data = new InputModel { ImagePath = imageLocation };
// Define scoring pipeline
var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
var outputs = predictionEngine.Predict(data);
var outputMapped = tags.Zip(outputs.Scores).Select(t => new { Tag = t.First, f = t.Second })
.ToDictionary(a => a.Tag, a => a.f);
var outputTags = outputMapped.Where(a => Math.Abs(a.Value - 1) < 0.00001f).Select(a => a.Key).OrderBy(a => a)
.ToList();
}
private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
{
var estimator = mlContext.Transforms.LoadImages(InputModel.ModelInput, "", nameof(InputModel.ImagePath))
.Append(mlContext.Transforms.ResizeImages(InputModel.ModelInput, InputModel.imageWidth,
InputModel.imageHeight, InputModel.ModelInput, ImageResizingEstimator.ResizingKind.IsoPad))
.Append(mlContext.Transforms.ExtractPixels(InputModel.ModelInput, InputModel.ModelInput))
.Append(mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
modelLocation));
var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));
// Fit scoring pipeline
var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
return predictionEngine;
}
class InputModel
{
public const int imageHeight = 512;
public const int imageWidth = 512;
// input tensor name
public const string ModelInput = "input_1:0";
public string ImagePath { get; set; }
[ColumnName(ModelInput)]
[ImageType(imageHeight, imageWidth)]
public Bitmap Image { get; set; }
}
class OutputModel
{
// output tensor name
public const string ModelOutput = "Identity:0";
[ColumnName(ModelOutput)]
public float[] Scores { get; set; }
}
}
I wrote up a very simple test program to try to get an output that matches a python project, only in C# so I could efficiently use it in an ASP.Net api (also just prefer C#). The original Python works, even after I modified it to use onnxruntime instead of keras, which is where the model originated. It gives a float[9176] of scores 0-1, which matches a list of tags in tags.txt, for whether that tag should apply to a given image.
It's a multi-classification problem with TensorFlow. I used the object detection sample to get here, and it returns a result, and the result is...correct, but not. It's rounding for whatever reason.
I'm new to ML and ML.Net has very little out there, so I figured I'd use my first question in a long time and hoping someone can shed some light on this for me.
Ok, new day. I traced the code path of the python project and made an MVP. In doing so, I have very few things to look at the difference of.
import os
import onnxruntime
import skimage.transform
import tensorflow as tf
def main():
# disable CUDA acceleration for simplicity in running the test
# you need drivers, an nvidia gpu, etc. for that
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
image_path = 'C:\\Users\\da3ds\\Pictures\\image.jpg'
model_path = 'C:\\Users\\da3ds\\Downloads\\deepdanbooru-v3-20211112-sgd-e28\\model-resnet-custom_v3.onnx'
# load tags
tags_path = 'C:\\Users\\da3ds\\Downloads\\deepdanbooru-v3-20211112-sgd-e28\\tags.txt'
with open(tags_path, 'r') as tags_stream:
tags = [tag for tag in (tag.strip() for tag in tags_stream) if tag]
# create inference session
model = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])
width = model.get_inputs()[0].shape[1] # 512
height = model.get_inputs()[0].shape[2] # 512
image_raw = tf.io.read_file(image_path)
image = tf.io.decode_png(image_raw, channels=3)
image = tf.image.resize(image, size=(width, height), method=tf.image.ResizeMethod.AREA, preserve_aspect_ratio=True)
image = image.numpy() # EagerTensor to np.array
image_width = image.shape[0]
image_height = image.shape[1]
t = skimage.transform.AffineTransform(translation=(-image_width * 0.5, -image_height * 0.5))
t += skimage.transform.AffineTransform(translation=(width * 0.5, height * 0.5))
image = skimage.transform.warp(image, t.inverse, output_shape=(width, height), order=1, mode='edge')
# at this point all widths and heights are probably 512
# normalize the image
image = image / 255.0
image_shape = image.shape
# build the input shape of Vector<1, 512, 512, 3>
image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
onnx_result = model.run(None, {'input_1:0': image})
# onnx_result is 2 arrays deep for reason
# 1 would make sense, as it can handle batches
onnx_result = onnx_result[0][0]
# print a nice result
for i, tag in enumerate(tags):
print(f'({onnx_result[i]:05.3f}) {tag}')
if __name__ == '__main__':
main()
Conveniently, in doing so, I made a mistake in a default value that yielded the same result as the ML.Net results: (not) Normalizing the Image. I couldn't figure out how to do that in the ML.Net pipeline, so I made the array with Magick.Net and fed it to ML.Net directly.
Here's the final code:
using ImageMagick;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace OnnxTest;
public static class Program
{
public static void Main(string[] args)
{
var tags = File.ReadLines(#"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
var imageLocation = #"C:\Users\da3ds\Pictures\image.jpg";
var modelLocation = #"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
MLContext mlContext = new MLContext(seed: 0);
Console.WriteLine("Read model");
Console.WriteLine($"Model location: {modelLocation}");
Console.WriteLine(
$"Default parameters: image size=({InputModel.Width},{InputModel.Height})");
Console.WriteLine($"Images location: {imageLocation}");
Console.WriteLine("");
Console.WriteLine("=====Identify the objects in the images=====");
Console.WriteLine("");
// Create IDataView from empty list to obtain input data schema
var data = new InputModel { Data = GetImage(imageLocation) };
// Define scoring pipeline
var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
var output = predictionEngine.Predict(data);
var outputMapped = tags.Zip(output.Scores).Select(t => new { Tag = t.First, f = t.Second })
.ToDictionary(a => a.Tag, a => a.f);
var outputTags = outputMapped.Where(a => a.Value > 0.80f).Select(a => (Tag: a.Key, Score: a.Value))
.ToList();
foreach (var tag in outputTags)
{
Console.WriteLine($"({tag.Score:P1}) {tag.Tag}");
}
}
private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
{
var transformer = GetBasicTransformer(mlContext, modelLocation);
// Fit scoring pipeline
var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
return predictionEngine;
}
private static ITransformer GetBasicTransformer(MLContext mlContext, string modelLocation)
{
var estimator = mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
modelLocation);
var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));
return transformer;
}
public static float[] GetImage(string imagePath)
{
using var mImage = new MagickImage(imagePath);
mImage.Quality = 100;
mImage.BackgroundColor = new MagickColor(0, 0, 0);
mImage.HasAlpha = false;
mImage.Resize(new MagickGeometry($"{InputModel.Width}>x{InputModel.Height}>"));
mImage.Extent(InputModel.Width, InputModel.Height, Gravity.Center, new MagickColor(0,0,0));
var pixels = mImage.GetPixels();
var array = pixels.ToArray();
var data = new float[InputModel.Width * InputModel.Height * InputModel.Channels];
for (var index = 0; index < array.Length; index++)
{
data[index] = array[index] / 255.0f;
}
return data;
}
class InputModel
{
public const int Width = 512;
public const int Height = 512;
public const int Channels = 3;
public const string ModelInput = "input_1:0";
[ColumnName(ModelInput)]
[VectorType(1, Width, Height, Channels)]
public float[] Data { get; set; }
}
class OutputModel
{
// output tensor name
public const string ModelOutput = "Identity:0";
[ColumnName(ModelOutput)]
public float[] Scores { get; set; }
}
}
Obviously, the final...final code will be less of an MVP, but this was a test. I leave this as a trail of my efforts in case someone else hits a similar issue. At the very least, it gives my debugging steps and some sample code. Thanks for being my rubber ducks.
I want to replace the pixel data of a DICOM file with another one. I used this code:
public bool ImportImage(string imageFile, string newFilePah, string oldDicomFile)
{
try
{
Bitmap bitmap = new Bitmap(imageFile);
bitmap = GetValidImage(bitmap);
int rows, columns;
byte[] pixels = GetPixels(bitmap, out rows, out columns);
MemoryByteBuffer buffer = new MemoryByteBuffer(pixels);
DicomDataset dataset = new DicomDataset();
var df = DicomFile.Open(oldDicomFile);
FillDataset(ref dataset, df);
DicomTransferSyntax dicomTransfer = df.Dataset.Get<DicomTransferSyntax>(DicomTag.TransferSyntaxUID, DicomTransferSyntax.JPEGProcess14);
dataset.AddOrUpdate(DicomTag.PhotometricInterpretation, PhotometricInterpretation.Rgb.Value);
dataset.AddOrUpdate(DicomTag.Rows, (ushort)rows);
dataset.AddOrUpdate(DicomTag.Columns, (ushort)columns);
dataset.AddOrUpdate(DicomTag.BitsAllocated, (ushort)8);
DicomPixelData pixelData = DicomPixelData.Create(dataset, true);
pixelData.BitsStored = 8;
pixelData.SamplesPerPixel = 3;
pixelData.HighBit = 7;
pixelData.PixelRepresentation = 0;
pixelData.PlanarConfiguration = 0;
pixelData.AddFrame(buffer);
DicomFile dicomfile = new DicomFile(dataset.Clone(dicomTransfer));
dicomfile.Save(newFilePah);
return true;
}
catch(Exception ddd) { return false; }
}
private void FillDataset(ref DicomDataset dataset, DicomFile df)
{
foreach(var item in df.Dataset)
{
if(!item.Tag.Group.ToString().Equals("7FE0") && !item.Tag.Group.ToString().Equals("40"))
dataset.Add(item);
}
}
The output DICOM file loses many tags which affect image display.
I referred to this answer. But the AddOrUpdatePixelData method used in that answer is deprecated in version v4.0.0-rc1 that I am using. So that answer does not help me.
Is there any other way to change the pixel data of a DICOM file using fo-DICOM?
Following code does replace the pixel data correctly.
public static bool ImportImage(string imageFile, string newFilePah, string oldDicomFile)
{
Bitmap bitmap = new Bitmap(imageFile);
int rows, columns;
byte[] pixels = GetPixels(bitmap, out rows, out columns);
MemoryByteBuffer buffer = new MemoryByteBuffer(pixels);
DicomDataset dataset = new DicomDataset();
var dicomfile = DicomFile.Open(oldDicomFile);
dataset = dicomfile.Dataset.Clone();
dataset.AddOrUpdate(DicomTag.PhotometricInterpretation, PhotometricInterpretation.Rgb.Value);
dataset.AddOrUpdate(DicomTag.Rows, (ushort)rows);
dataset.AddOrUpdate(DicomTag.Columns, (ushort)columns);
dataset.AddOrUpdate(DicomTag.BitsAllocated, (ushort)8);
DicomPixelData pixelData = DicomPixelData.Create(dataset, true);
pixelData.BitsStored = 8;
pixelData.SamplesPerPixel = 3;
pixelData.HighBit = 7;
pixelData.PhotometricInterpretation = PhotometricInterpretation.Rgb;
pixelData.PixelRepresentation = 0;
pixelData.PlanarConfiguration = 0;
pixelData.Height = (ushort)rows;
pixelData.Width = (ushort)columns;
pixelData.AddFrame(buffer);
dicomfile = new DicomFile(dataset);
dicomfile.Save(newFilePah);
return true;
}
private static byte[] GetPixels(Bitmap bitmap, out int rows, out int columns)
{
using(var stream = new MemoryStream())
{
bitmap.Save(stream, System.Drawing.Imaging.ImageFormat.Bmp);
rows = bitmap.Height;
columns = bitmap.Width;
return stream.ToArray();
}
}
You can see I have cleaned up your code much.
But the major change is using System.Drawing.Imaging.ImageFormat.Bmp instead of other formats. This depends on actual input image format. Use the format as that of input image.
For detailed insight, please refer to this source code on github.
I have an image. I read the text content using ironocr. The following code used to read text.
var Ocr = new AutoOcr();
var Result = Ocr.Read(bmpCrop);
string text = Result.Text;
return text;
But the text trims the space and I couldn't get the exact copy of the text as in the image. Is there way or any other ocr libraries that reads text as an exact copy from the image. Please find the image attached, that I have used to read using ocr.
I have tried the following methode specified in the below url also, This also not working for me.
How to preserve document structure in tesseract
I have found that the latest IronOCR has a detailed document object model of pages, blocks, paragraphs, lines, words and characters
https://ironsoftware.com/csharp/ocr/examples/results-objects/
using IronOcr;
using System.Drawing; //for image export
// We can delve deep into OCR results as an object model of
// Pages, Barcodes, Paragraphs, Lines, Words and Characters
// This allows us to explore, export and draw OCR content using other APIs/
var Ocr = new IronTesseract();
Ocr.Configuration.EngineMode = TesseractEngineMode.TesseractAndLstm;
Ocr.Configuration.ReadBarCodes = true;
using (var Input = new OcrInput(#"example.tiff"))
{
OcrResult Result = Ocr.Read(Input);
foreach (var Page in Result.Pages)
{
// Page object
int PageNumber = Page.PageNumber;
string PageText = Page.Text;
int PageWordCount = Page.WordCount;
// null if we dont set Ocr.Configuration.ReadBarCodes = true;
OcrResult.Barcode[] Barcodes = Page.Barcodes;
System.Drawing.Bitmap PageImage = Page.ToBitmap(Input);
int PageWidth = Page.Width;
int PageHeight = Page.Height;
foreach (var Paragraph in Page.Paragraphs)
{
// Pages -> Paragraphs
int ParagraphNumber = Paragraph.ParagraphNumber;
String ParagraphText = Paragraph.Text;
System.Drawing.Bitmap ParagraphImage = Paragraph.ToBitmap(Input);
int ParagraphX_location = Paragraph.X;
int ParagraphY_location = Paragraph.Y;
int ParagraphWidth = Paragraph.Width;
int ParagraphHeight = Paragraph.Height;
double ParagraphOcrAccuracy = Paragraph.Confidence;
OcrResult.TextFlow paragrapthText_direction = Paragraph.TextDirection;
foreach (var Line in Paragraph.Lines)
{
// Pages -> Paragraphs -> Lines
int LineNumber = Line.LineNumber;
String LineText = Line.Text;
System.Drawing.Bitmap LineImage = Line.ToBitmap(Input); ;
int LineX_location = Line.X;
int LineY_location = Line.Y;
int LineWidth = Line.Width;
int LineHeight = Line.Height;
double LineOcrAccuracy = Line.Confidence;
double LineSkew = Line.BaselineAngle;
double LineOffset = Line.BaselineOffset;
foreach (var Word in Line.Words)
{
// Pages -> Paragraphs -> Lines -> Words
int WordNumber = Word.WordNumber;
String WordText = Word.Text;
System.Drawing.Image WordImage = Word.ToBitmap(Input);
int WordX_location = Word.X;
int WordY_location = Word.Y;
int WordWidth = Word.Width;
int WordHeight = Word.Height;
double WordOcrAccuracy = Word.Confidence;
if (Word.Font != null)
{
// Word.Font is only set when using Tesseract Engine Modes rather than LTSM
String FontName = Word.Font.FontName;
double FontSize = Word.Font.FontSize;
bool IsBold = Word.Font.IsBold;
bool IsFixedWidth = Word.Font.IsFixedWidth;
bool IsItalic = Word.Font.IsItalic;
bool IsSerif = Word.Font.IsSerif;
bool IsUnderLined = Word.Font.IsUnderlined;
bool IsFancy = Word.Font.IsCaligraphic;
}
foreach (var Character in Word.Characters)
{
// Pages -> Paragraphs -> Lines -> Words -> Characters
int CharacterNumber = Character.CharacterNumber;
String CharacterText = Character.Text;
System.Drawing.Bitmap CharacterImage = Character.ToBitmap(Input);
int CharacterX_location = Character.X;
int CharacterY_location = Character.Y;
int CharacterWidth = Character.Width;
int CharacterHeight = Character.Height;
double CharacterOcrAccuracy = Character.Confidence;
// Output alternative symbols choices and their probability.
// Very useful for spellchecking
OcrResult.Choice[] Choices = Character.Choices;
}
}
}
}
}
}
I am trying to save my kinect raw depth-data and i dont want to use the Kinect Studio, because i need the raw-data for further calculations. I am using the kinectv2 and kinect sdk!
My problem is that i just get low FPS for the saved data. Its about 15-17FPS.
Here my Framereader ( in further steps i want to save colorstream also):
frameReader = kinectSensor.OpenMultiSourceFrameReader(FrameSourceTypes.Depth);
frameReader.MultiSourceFrameArrived += Reader_MultiSourceFrameArrived;
Here the Event:
void Reader_MultiSourceFrameArrived(object sender, MultiSourceFrameArrivedEventArgs e)
{
var reference = e.FrameReference.AcquireFrame();
saveFrameTest(reference);
frame_num++;
}
Here the saving-function:
private unsafe void saveFrameTest(Object reference)
{
MultiSourceFrame mSF = (MultiSourceFrame)reference;
using (var frame = mSF.DepthFrameReference.AcquireFrame())
{
if (frame != null)
{
using (Microsoft.Kinect.KinectBuffer depthBuffer = frame.LockImageBuffer())
{
if ((frame.FrameDescription.Width * frame.FrameDescription.Height) == (depthBuffer.Size / frame.FrameDescription.BytesPerPixel))
{
ushort* frameData = (ushort*)depthBuffer.UnderlyingBuffer;
byte[] rawDataConverted = new byte[(int)(depthBuffer.Size / 2)];
for (int i = 0; i < (int)(depthBuffer.Size / 2); ++i)
{
ushort depth = frameData[i];
rawDataConverted[i] = (byte)(depth >= frame.DepthMinReliableDistance && depth <= frame.DepthMaxReliableDistance ? (depth) : 0);
}
String date = string.Format("{0:hh-mm-ss}", DateTime.Now);
String filePath = System.IO.Directory.GetCurrentDirectory() + "/test/" +date+".raw";
File.WriteAllBytes(filePath, rawDataConverted);
rawDataConverted = null;
}
}
}
}
}
Further Infomration:
I included my code in a simple Console-Application on a Intel® Xeon® Processor E5-1620 3,7GHz with 16 GB RAM.
i think the for-loop is taking to much time:
for (int i = 0; i < (int)(depthBuffer.Size / 2); ++i)
{
ushort depth = frameData[i];
rawDataConverted[i] = (byte)(depth >= frame.DepthMinReliableDistance && depth <= frame.DepthMaxReliableDistance ? (depth) : 0);
}
I could improved my framerate. Now, i am accessing the kinectbuffer directly and resign the for-loop.
Microsoft.Kinect.KinectBuffer depthBuffer = frame.LockImageBuffer();
Marshal.Copy(depthBuffer.UnderlyingBuffer, rawData_depth, 0, (depthImageSize));
depthBuffer.Dispose();
frame.Dispose();
However i couldnt get the 30FPS-rate. Now it is about 25 FPS.
You could try something like this to get your array.
It's what I normally use.
var frame = frameReference.AcquireFrame();
var frameDescription = frame.FrameDescription;
ushort[] frameData = new ushort[frameDescription.Width * frameDescription.Height];
frame.CopyFrameDataToArray(frameData);
I want to create a gif file with several frames.
I want to use the Method which Microsoft support--Image.SaveAdd
But I don't know how to set the EncoderParameters Paramater to make up a gif file.
I can't find documents to refer. So how to create a gif file with Image.SaveAdd
Probably, too late to be useful for the original poster, but I managed to create a proper gif using just System.Drawing. The code below is based on jschroedl's answer, but also sets the frame delays and number of animation loops.
// Gdi+ constants absent from System.Drawing.
const int PropertyTagFrameDelay = 0x5100;
const int PropertyTagLoopCount = 0x5101;
const short PropertyTagTypeLong = 4;
const short PropertyTagTypeShort = 3;
const inr UintBytes = 4;
//...
var gifEncoder = GetEncoder(ImageFormat.Gif);
// Params of the first frame.
var encoderParams1 = new EncoderParameters(1);
encoderParams1.Param[0] = new EncoderParameter(Encoder.SaveFlag, (long)EncoderValue.MultiFrame);
// Params of other frames.
var encoderParamsN = new EncoderParameters(1);
encoderParamsN.Param[0] = new EncoderParameter(Encoder.SaveFlag, (long)EncoderValue.FrameDimensionTime);
// Params for the finalizing call.
var encoderParamsFlush = new EncoderParameters(1);
encoderParamsFlush.Param[0] = new EncoderParameter(Encoder.SaveFlag, (long)EncoderValue.Flush);
// PropertyItem for the frame delay (apparently, no other way to create a fresh instance).
var frameDelay = (PropertyItem)FormatterServices.GetUninitializedObject(typeof(PropertyItem));
frameDelay.Id = PropertyTagFrameDelay;
frameDelay.Type = PropertyTagTypeLong;
// Length of the value in bytes.
frameDelay.Len = Bitmaps.Count * UintBytes;
// The value is an array of 4-byte entries: one per frame.
// Every entry is the frame delay in 1/100-s of a second, in little endian.
frameDelay.Value = new byte[Bitmaps.Count * UintBytes];
// E.g., here, we're setting the delay of every frame to 1 second.
var frameDelayBytes = BitConverter.GetBytes((uint)100);
for (int j = 0; j < Bitmaps.Count; ++j)
Array.Copy(frameDelayBytes, 0, frameDelay.Value, j * UintBytes, UintBytes);
// PropertyItem for the number of animation loops.
var loopPropertyItem = (PropertyItem)FormatterServices.GetUninitializedObject(typeof(PropertyItem));
loopPropertyItem.Id = PropertyTagLoopCount;
loopPropertyItem.Type = PropertyTagTypeShort;
loopPropertyItem.Len = 1;
// 0 means to animate forever.
loopPropertyItem.Value = BitConverter.GetBytes((ushort)0);
using (var stream = new FileStream("animation.gif", FileMode.Create))
{
bool first = true;
Bitmap firstBitmap = null;
// Bitmaps is a collection of Bitmap instances that'll become gif frames.
foreach (var bitmap in Bitmaps)
{
if (first)
{
firstBitmap = bitmap;
firstBitmap.SetPropertyItem(frameDelay);
firstBitmap.SetPropertyItem(loopPropertyItem);
firstBitmap.Save(stream, gifEncoder, encoderParams1);
first = false;
}
else
{
firstBitmap.SaveAdd(bitmap, encoderParamsN);
}
}
firstBitmap.SaveAdd(encoderParamsFlush);
}
// ...
private ImageCodecInfo GetEncoder(ImageFormat format)
{
ImageCodecInfo[] codecs = ImageCodecInfo.GetImageDecoders();
foreach (ImageCodecInfo codec in codecs)
{
if (codec.FormatID == format.Guid)
{
return codec;
}
}
return null;
}
I had success with these parameters. img1,2,3,4... are the images I want to combine.
ULONG parameterValue;
EncoderParameters encoderParameters;
encoderParameters.Count = 1;
encoderParameters.Parameter[0].Guid = EncoderSaveFlag;
encoderParameters.Parameter[0].Type = EncoderParameterValueTypeLong;
encoderParameters.Parameter[0].NumberOfValues = 1;
encoderParameters.Parameter[0].Value = ¶meterValue;
// Save the first frame
parameterValue = EncoderValueMultiFrame;
rc = img1->Save(L"Output.gif", &encoderClsid, &encoderParameters);
assert(rc == Ok);
// Add the second frame
parameterValue = EncoderValueFrameDimensionTime;
rc = img1->SaveAdd(img2, &encoderParameters);
assert(rc == Ok);
// etc...adding frames img3,4,5...
// Done...
parameterValue = EncoderValueFlush;
rc = img1->SaveAdd(&encoderParameters);
assert(rc == Ok);
Edit: I just realized that you asked for C# and I have C++ code. Hopefully the parameters still apply.
If you want create gif with many picture you can use ngif. see this
//you should replace filepath
String [] imageFilePaths = new String[]{"c:\\01.png","c:\\02.png","c:\\03.png"};
String outputFilePath = "c:\\test.gif";
AnimatedGifEncoder e = new AnimatedGifEncoder();
e.Start( outputFilePath );
e.SetDelay(500);
//-1:no repeat,0:always repeat
e.SetRepeat(0);
for (int i = 0, count = imageFilePaths.Length; i < count; i++ )
{
e.AddFrame( Image.FromFile( imageFilePaths[i] ) );
}
e.Finish();
/* extract Gif */
string outputPath = "c:\\";
GifDecoder gifDecoder = new GifDecoder();
gifDecoder.Read( "c:\\test.gif" );
for ( int i = 0, count = gifDecoder.GetFrameCount(); i < count; i++ )
{
Image frame = gifDecoder.GetFrame( i ); // frame i
frame.Save( outputPath + Guid.NewGuid().ToString()
+ ".png", ImageFormat.Png );
}