I am trying to create a program that accepts an image, recursively goes through each pixel, normalizes the pixel and re-creates a NEW image that looks the same as the original, but has normalized pixels instead.
public void parseJpeg(String jpegPath)
{
var normalizedRed = 0.0;
var normalizedGreen = 0.0;
var normalizedBlue = 0.0;
Bitmap normalizedImage = null;
var image = new Bitmap(jpegPath);
normalizedImage = new Bitmap(image.Width, image.Height);
for (int x = 0; x < image.Width; ++x)
{
for (int y = 0; y < image.Height; ++y)
{
Color color = image.GetPixel(x, y);
double exponent = 2;
double redDouble = Convert.ToDouble(color.R);
double blueDouble = Convert.ToDouble(color.B);
double greenDouble = Convert.ToDouble(color.G);
double redResult = Math.Pow(redDouble, exponent);
double blueResult = Math.Pow(blueDouble, exponent);
double greenResult = Math.Pow(greenDouble, exponent);
double totalResult = redResult + blueResult + greenResult;
normalizedRed = Convert.ToDouble(color.R) / Math.Sqrt(totalResult);
normalizedGreen = Convert.ToDouble(color.G) / Math.Sqrt(totalResult);
normalizedBlue = Convert.ToDouble(color.B) / Math.Sqrt(totalResult);
Color newCol = Color.FromArgb(Convert.ToInt32(normalizedRed), Convert.ToInt32(normalizedGreen), Convert.ToInt32(normalizedBlue));
normalizedImage.SetPixel(x, y, newCol);
}
}
normalizedImage.Save("C:\\Users\\username\\Desktop\\test1.jpeg");
resultsViewBox.AppendText("Process completed.\n");
}
Using the above code produces all black pixels and I do not understand why. When it normalizes it sets RGB = 1. After normalization, how do I set pixels with the NEW normalized value?
When I perform the below code, I get a black and blue image in my preview, but when I open the file it's blank. This is better than what I was getting before, which was ALL black pixels. This only works on one image though. So I am not sure how much of a step forward it is.
public void parseJpeg(String jpegPath)
{
Bitmap normalizedImage = null;
var image = new Bitmap(jpegPath);
normalizedImage = new Bitmap(image.Width, image.Height);
for (int x = 0; x < image.Width; ++x)
{
for (int y = 0; y < image.Height; ++y)
{
Color color = image.GetPixel(x, y);
float norm = (float)System.Math.Sqrt(color.R * color.R + color.B * color.B + color.G * color.G);
Color newCol = Color.FromArgb(Convert.ToInt32(norm));
normalizedImage.SetPixel(x, y, newCol);
}
}
normalizedImage.Save("C:\\Users\\username\\Desktop\\test1.jpeg");
resultsViewBox.AppendText("Process completed.\n");
}
I found the code for what I was trying to do:
http://www.lukehorvat.com/blog/normalizing-image-brightness-in-csharp/
public void parseJpeg(String jpegPath)
{
var image = new Bitmap(jpegPath);
normalizedImage = new Bitmap(image.Width, image.Height);
for (int x = 0; x < image.Width; ++x)
{
for (int y = 0; y < image.Height; ++y)
{
float pixelBrightness = image.GetPixel(x, y).GetBrightness();
minBrightness = Math.Min(minBrightness, pixelBrightness);
maxBrightness = Math.Max(maxBrightness, pixelBrightness);
}
}
for (int x = 0; x < image.Width; x++)
{
for (int y = 0; y < image.Height; y++)
{
Color pixelColor = image.GetPixel(x, y);
float normalizedPixelBrightness = (pixelColor.GetBrightness() - minBrightness) / (maxBrightness - minBrightness);
Color normalizedPixelColor = ColorConverter.ColorFromAhsb(pixelColor.A, pixelColor.GetHue(), pixelColor.GetSaturation(), normalizedPixelBrightness);
normalizedImage.SetPixel(x, y, normalizedPixelColor);
}
}
normalizedImage.Save("C:\\Users\\username\\Desktop\\test1.jpeg");
resultsViewBox.AppendText("Process completed.\n");
}
You are creating a new Bitmap and saving over the file for every pixel in your image. Move the
normalizedImage = new Bitmap(image.Width, image.Height);
line to before your loops, and the
normalizedImage.Save("C:\\Users\\username\\Desktop\\test1.jpeg");
line to after your loops.
Your normalization algorithm does not appear to be correct. Let's say your original color was red (255,0,0) Then your totalResult will be 65025, and your normalizedRed will be 255/sqrt(65025), which is 1, giving you a new normalized color of (1,0,0), which is essentially black.
Just as a note, your code will run a bit faster if you define all the doubles once outside the look and then assign them within the loop rather than defining and deleting each of the 8 doubles each iteration
Instead of messing with the colors you should use the brightness or luminosity factor to achieve normalization. Here is a link to the already answered question that can help you. you can convert each RGB pixel to HSL and minupulate L factor:
How do I normalize an image?
The code that you shared is actually a trim down version of HSL manipulation.
Related
I am writing a .Net wrapper for Tesseract Ocr and if I use a grayscale image instead of rgb image as an input file to it then results are pretty good.
So I was searching the web for C# solution to convert a Rgb image to grayscale image and I found this code.
This performs 3 operations to increase the accuracy of tesseract.
Resize the image
then convert into grayscale image and remove noise from image
Now this converted image gives almost 90% accurate results.
//Resize
public Bitmap Resize(Bitmap bmp, int newWidth, int newHeight)
{
Bitmap temp = (Bitmap)bmp;
Bitmap bmap = new Bitmap(newWidth, newHeight, temp.PixelFormat);
double nWidthFactor = (double)temp.Width / (double)newWidth;
double nHeightFactor = (double)temp.Height / (double)newHeight;
double fx, fy, nx, ny;
int cx, cy, fr_x, fr_y;
Color color1 = new Color();
Color color2 = new Color();
Color color3 = new Color();
Color color4 = new Color();
byte nRed, nGreen, nBlue;
byte bp1, bp2;
for (int x = 0; x < bmap.Width; ++x)
{
for (int y = 0; y < bmap.Height; ++y)
{
fr_x = (int)Math.Floor(x * nWidthFactor);
fr_y = (int)Math.Floor(y * nHeightFactor);
cx = fr_x + 1;
if (cx >= temp.Width)
cx = fr_x;
cy = fr_y + 1;
if (cy >= temp.Height)
cy = fr_y;
fx = x * nWidthFactor - fr_x;
fy = y * nHeightFactor - fr_y;
nx = 1.0 - fx;
ny = 1.0 - fy;
color1 = temp.GetPixel(fr_x, fr_y);
color2 = temp.GetPixel(cx, fr_y);
color3 = temp.GetPixel(fr_x, cy);
color4 = temp.GetPixel(cx, cy);
// Blue
bp1 = (byte)(nx * color1.B + fx * color2.B);
bp2 = (byte)(nx * color3.B + fx * color4.B);
nBlue = (byte)(ny * (double)(bp1) + fy * (double)(bp2));
// Green
bp1 = (byte)(nx * color1.G + fx * color2.G);
bp2 = (byte)(nx * color3.G + fx * color4.G);
nGreen = (byte)(ny * (double)(bp1) + fy * (double)(bp2));
// Red
bp1 = (byte)(nx * color1.R + fx * color2.R);
bp2 = (byte)(nx * color3.R + fx * color4.R);
nRed = (byte)(ny * (double)(bp1) + fy * (double)(bp2));
bmap.SetPixel(x, y, System.Drawing.Color.FromArgb(255, nRed, nGreen, nBlue));
}
}
//here i included the below to functions logic without the for loop to remove repetitive use of for loop but it did not work and taking the same time.
bmap = SetGrayscale(bmap);
bmap = RemoveNoise(bmap);
return bmap;
}
//SetGrayscale
public Bitmap SetGrayscale(Bitmap img)
{
Bitmap temp = (Bitmap)img;
Bitmap bmap = (Bitmap)temp.Clone();
Color c;
for (int i = 0; i < bmap.Width; i++)
{
for (int j = 0; j < bmap.Height; j++)
{
c = bmap.GetPixel(i, j);
byte gray = (byte)(.299 * c.R + .587 * c.G + .114 * c.B);
bmap.SetPixel(i, j, Color.FromArgb(gray, gray, gray));
}
}
return (Bitmap)bmap.Clone();
}
//RemoveNoise
public Bitmap RemoveNoise(Bitmap bmap)
{
for (var x = 0; x < bmap.Width; x++)
{
for (var y = 0; y < bmap.Height; y++)
{
var pixel = bmap.GetPixel(x, y);
if (pixel.R < 162 && pixel.G < 162 && pixel.B < 162)
bmap.SetPixel(x, y, Color.Black);
}
}
for (var x = 0; x < bmap.Width; x++)
{
for (var y = 0; y < bmap.Height; y++)
{
var pixel = bmap.GetPixel(x, y);
if (pixel.R > 162 && pixel.G > 162 && pixel.B > 162)
bmap.SetPixel(x, y, Color.White);
}
}
return bmap;
}
But the problem is it takes lot of time to convert it
So I included SetGrayscale(Bitmap bmap)
RemoveNoise(Bitmap bmap) function logic inside the Resize() method to remove repetitive use of for loop
but it did not solve my problem.
The Bitmap class's GetPixel() and SetPixel() methods are notoriously slow for multiple read/writes. A much faster way to access and set individual pixels in a bitmap is to lock it first.
There's a good example here on how to do that, with a nice class LockedBitmap to wrap around the stranger Marshaling code.
Essentially what it does is use the LockBits() method in the Bitmap class, passing a rectangle for the region of the bitmap you want to lock, and then copy those pixels from its unmanaged memory location to a managed one for easier access.
Here's an example on how you would use that example class with your SetGrayscale() method:
public Bitmap SetGrayscale(Bitmap img)
{
LockedBitmap lockedBmp = new LockedBitmap(img.Clone());
lockedBmp.LockBits(); // lock the bits for faster access
Color c;
for (int i = 0; i < lockedBmp.Width; i++)
{
for (int j = 0; j < lockedBmp.Height; j++)
{
c = lockedBmp.GetPixel(i, j);
byte gray = (byte)(.299 * c.R + .587 * c.G + .114 * c.B);
lockedBmp.SetPixel(i, j, Color.FromArgb(gray, gray, gray));
}
}
lockedBmp.UnlockBits(); // remember to release resources
return lockedBmp.Bitmap; // return the bitmap (you don't need to clone it again, that's already been done).
}
This wrapper class has saved me a ridiculous amount of time in bitmap processing. Once you've implemented this in all your methods, preferably only calling LockBits() once, then I'm sure your application's performance will improve tremendously.
I also see that you're cloning the images a lot. This probably doesn't take up as much time as the SetPixel()/GetPixel() thing, but its time can still be significant especially with larger images.
The easiest way would be to redraw the image onto itself using DrawImage and passing a suitable ColorMatrix. Google for ColorMatrix and gray scale and you'll find a ton of examples, this one for example: http://www.codeproject.com/Articles/3772/ColorMatrix-Basics-Simple-Image-Color-Adjustment
After using STFT(Short-time Fourier transform) the output is a matrix that represents a 3d plot as though (A[X, Y] = M) A is the output matrix, X is the time , Y is the frequency, and the third dimension M is the amplitude illustrated by the intensity of the pixel color as in the following pictures:
Spectrogram 2
How do I draw the output matrix A with a gradient of colors like in the pictures in C#? Is there a library that contains a spectrogram control for C#?
Update:
After some modifications on the given algorithm I could draw the spectrogram, I didn't change the color palette except the first color changed to black but I don't know why it's very faded!
This one represents a sound saying
Bye Bye
Bye Bye Spectrogram
And this one of a pure sine wave so it's almost the same frequency all the time
Pure sine wave Spectrogram
The output is accepted it represents the frequencies of the input signal as expected, but i think there is a way to make the spectrogram as well illustrated as the ones in the examples, could you please take a look at my code and suggest modifications?
This is the event handler:
private void SpectrogramButton_Click(object sender, EventArgs e)
{
Complex[][] SpectrogramData = Fourier_Transform.STFT(/*signal:*/ samples, /*windowSize:*/ 512, /*hopSize:*/ 512);
SpectrogramBox.Image = Spectrogram.DrawSpectrogram(SpectrogramData, /*Interpolation Factor:*/ 1000, /*Height:*/ 256);
}
And this one is the drawing function after my modifications:
public static Bitmap DrawSpectrogram(Complex[][] Data, int InterpolationFactor, int Height)
{
// target size:
Size sz = new Size(Data.GetLength(0), Height);
Bitmap bmp = new Bitmap(sz.Width, sz.Height);
// the data array:
//double[,] data = new double[222, 222];
// step sizes:
float stepX = 1f * sz.Width / Data.GetLength(0);
float stepY = 1f * sz.Height / Data[0].GetLength(0);
// create a few stop colors:
List<Color> baseColors = new List<Color>(); // create a color list
baseColors.Add(Color.Black);
baseColors.Add(Color.LightSkyBlue);
baseColors.Add(Color.LightGreen);
baseColors.Add(Color.Yellow);
baseColors.Add(Color.Orange);
baseColors.Add(Color.Red);
// and the interpolate a larger number of grdient colors:
List<Color> colors = interpolateColors(baseColors, InterpolationFactor);
// a few boring test data
//Random rnd = new Random(1);
//for (int x = 0; x < data.GetLength(0); x++)
// for (int y = 0; y < data.GetLength(1); y++)
// {
// //data[x, y] = rnd.Next((int)(300 + Math.Sin(x * y / 999) * 200)) +
// // rnd.Next(x + y + 111);
// data[x, y] = 0;
// }
// now draw the data:
float Max = Complex.Max(Data);
using (Graphics G = Graphics.FromImage(bmp))
for (int x = 0; x < Data.GetLength(0); x++)
for (int y = 0; y < Data[0].GetLength(0); y++)
{
int Val = (int)Math.Ceiling((Data[x][y].Magnitude / Max) * (InterpolationFactor - 1));
using (SolidBrush brush = new SolidBrush(colors[(int)Val]))
G.FillRectangle(brush, x * stepX, (Data[0].GetLength(0) - y) * stepY, stepX, stepY);
}
// and display the result
return bmp;
}
I don't really understand the log thing that you are talking about in your answers, I'm sorry for my little knowledge.
Update:
This is the output after adding taking log10 to the magnitudes (negative values neglected):
This one of "Bye bye" from before:
A Shotgun Blast:
A Music Box:
I Think this output is acceptable, it is different from the examples I brought in the beginning but I think it's better.
No, there is no out of the box control I know of. There may well be outside libraries you can buy, of course, but shhh, you can't ask on SO about them..
In theory you could use, or I guess I should rather say abuse a Chart control for this. But since DataPoints are rather expensive objects, or at least more expensive than they look, this seems not advisable.
Instead you can simply draw the graph into a Bitmap yourself.
Step one is to decide on a gradient of colors. See the interpolateColors function here for an example of this!
Then you would simply do a double loop over the data using floats for the step and pixel sizes and do a Graphics.FillRectangle there.
Here is a simple example using GDI+ to create a Bitmap and a Winforms PictureBox for display. It doesn't add any axes to the graphic and fills it completely.
It first creates a few sample data and a gradient wih 1000 colors. Then it draws into a Bitmap and displays the result:
private void button6_Click(object sender, EventArgs e)
{
// target size:
Size sz = pictureBox1.ClientSize;
Bitmap bmp = new Bitmap(sz.Width, sz.Height);
// the data array:
double[,] data = new double[222, 222];
// step sizes:
float stepX = 1f * sz.Width / data.GetLength(0);
float stepY = 1f * sz.Height / data.GetLength(1);
// create a few stop colors:
List<Color> baseColors = new List<Color>(); // create a color list
baseColors.Add(Color.RoyalBlue);
baseColors.Add(Color.LightSkyBlue);
baseColors.Add(Color.LightGreen);
baseColors.Add(Color.Yellow);
baseColors.Add(Color.Orange);
baseColors.Add(Color.Red);
// and the interpolate a larger number of grdient colors:
List<Color> colors = interpolateColors(baseColors, 1000);
// a few boring test data
Random rnd = new Random(1);
for (int x = 0; x < data.GetLength(0); x++)
for (int y = 0; y < data.GetLength(1); y++)
{
data[x, y] = rnd.Next( (int) (300 + Math.Sin(x * y / 999) * 200 )) +
rnd.Next( x + y + 111);
}
// now draw the data:
using (Graphics G = Graphics.FromImage(bmp))
for (int x = 0; x < data.GetLength(0); x++)
for (int y = 0; y < data.GetLength(1); y++)
{
using (SolidBrush brush = new SolidBrush(colors[(int)data[x, y]]))
G.FillRectangle(brush, x * stepX, y * stepY, stepX, stepY);
}
// and display the result
pictureBox1.Image = bmp;
}
Here is the function from the link:
List<Color> interpolateColors(List<Color> stopColors, int count)
{
SortedDictionary<float, Color> gradient = new SortedDictionary<float, Color>();
for (int i = 0; i < stopColors.Count; i++)
gradient.Add(1f * i / (stopColors.Count - 1), stopColors[i]);
List<Color> ColorList = new List<Color>();
using (Bitmap bmp = new Bitmap(count, 1))
using (Graphics G = Graphics.FromImage(bmp))
{
Rectangle bmpCRect = new Rectangle(Point.Empty, bmp.Size);
LinearGradientBrush br = new LinearGradientBrush
(bmpCRect, Color.Empty, Color.Empty, 0, false);
ColorBlend cb = new ColorBlend();
cb.Positions = new float[gradient.Count];
for (int i = 0; i < gradient.Count; i++)
cb.Positions[i] = gradient.ElementAt(i).Key;
cb.Colors = gradient.Values.ToArray();
br.InterpolationColors = cb;
G.FillRectangle(br, bmpCRect);
for (int i = 0; i < count; i++) ColorList.Add(bmp.GetPixel(i, 0));
br.Dispose();
}
return ColorList;
}
You would probably want to draw axes with labels etc. You can use Graphics.DrawString or TextRenderer.DrawText to do so. Just leave enough space around the drawing area!
I used the data values cast to int as direct pointers into the color table.
Depending on your data you will need to scale them down or even use a log conversion. The first of your images show a logarithmic scale going from 100 to 20k, the second looks linear going from 0 to 100.
If you show us your data structure we can give you further hints how to adapt the code to use it..
You can create a bitmap as per the other answer. It's also common to use a color lookup table to convert FFT log magnitude to the color to use for each pixel or small rectangle.
I've been experimenting with the image bicubic resampling algorithm present in the AForge framework with the idea of introducing something similar into my image processing solution. See the original algorithm here and interpolation kernel here
Unfortunately I've hit a wall. It looks to me like somehow I am calculating the sample destination position incorrectly, probably due to the algorithm being designed for Format24bppRgb images where as I am using a Format32bppPArgb format.
Here's my code:
public Bitmap Resize(Bitmap source, int width, int height)
{
int sourceWidth = source.Width;
int sourceHeight = source.Height;
Bitmap destination = new Bitmap(width, height, PixelFormat.Format32bppPArgb);
destination.SetResolution(source.HorizontalResolution, source.VerticalResolution);
using (FastBitmap sourceBitmap = new FastBitmap(source))
{
using (FastBitmap destinationBitmap = new FastBitmap(destination))
{
double heightFactor = sourceWidth / (double)width;
double widthFactor = sourceHeight / (double)height;
// Coordinates of source points
double ox, oy, dx, dy, k1, k2;
int ox1, oy1, ox2, oy2;
// Width and height decreased by 1
int maxHeight = height - 1;
int maxWidth = width - 1;
for (int y = 0; y < height; y++)
{
// Y coordinates
oy = (y * widthFactor) - 0.5;
oy1 = (int)oy;
dy = oy - oy1;
for (int x = 0; x < width; x++)
{
// X coordinates
ox = (x * heightFactor) - 0.5f;
ox1 = (int)ox;
dx = ox - ox1;
// Destination color components
double r = 0;
double g = 0;
double b = 0;
double a = 0;
for (int n = -1; n < 3; n++)
{
// Get Y cooefficient
k1 = Interpolation.BiCubicKernel(dy - n);
oy2 = oy1 + n;
if (oy2 < 0)
{
oy2 = 0;
}
if (oy2 > maxHeight)
{
oy2 = maxHeight;
}
for (int m = -1; m < 3; m++)
{
// Get X cooefficient
k2 = k1 * Interpolation.BiCubicKernel(m - dx);
ox2 = ox1 + m;
if (ox2 < 0)
{
ox2 = 0;
}
if (ox2 > maxWidth)
{
ox2 = maxWidth;
}
Color color = sourceBitmap.GetPixel(ox2, oy2);
r += k2 * color.R;
g += k2 * color.G;
b += k2 * color.B;
a += k2 * color.A;
}
}
destinationBitmap.SetPixel(
x,
y,
Color.FromArgb(a.ToByte(), r.ToByte(), g.ToByte(), b.ToByte()));
}
}
}
}
source.Dispose();
return destination;
}
And the kernel which should represent the given equation on Wikipedia
public static double BiCubicKernel(double x)
{
if (x < 0)
{
x = -x;
}
double bicubicCoef = 0;
if (x <= 1)
{
bicubicCoef = (1.5 * x - 2.5) * x * x + 1;
}
else if (x < 2)
{
bicubicCoef = ((-0.5 * x + 2.5) * x - 4) * x + 2;
}
return bicubicCoef;
}
Here's the original image at 500px x 667px.
And the image resized to 400px x 543px.
Visually it appears that the image is over reduced and then the same pixels are repeatedly applied once we hit a particular point.
Can anyone give me some pointers here to solve this?
Note FastBitmap is a wrapper for Bitmap that uses LockBits to manipulate pixels in memory. It works well with everything else I apply it to.
Edit
As per request here's the methods involved in ToByte
public static byte ToByte(this double value)
{
return Convert.ToByte(ImageMaths.Clamp(value, 0, 255));
}
public static T Clamp<T>(T value, T min, T max) where T : IComparable<T>
{
if (value.CompareTo(min) < 0)
{
return min;
}
if (value.CompareTo(max) > 0)
{
return max;
}
return value;
}
You are limiting your ox2 and oy2 to destination image dimensions, instead of source dimensions.
Change this:
// Width and height decreased by 1
int maxHeight = height - 1;
int maxWidth = width - 1;
to this:
// Width and height decreased by 1
int maxHeight = sourceHeight - 1;
int maxWidth = sourceWidth - 1;
Well, I've met a very strange thing, which might be or might be not a souce of the problem.
I've started to try implementing convolution matrix by myself and encountered strange behaviour. I was testing code on a small image 4x4 pixels. The code is following:
var source = Bitmap.FromFile(#"C:\Users\Public\Pictures\Sample Pictures\Безымянный.png");
using (FastBitmap sourceBitmap = new FastBitmap(source))
{
for (int TY = 0; TY < 4; TY++)
{
for (int TX = 0; TX < 4; TX++)
{
Color color = sourceBitmap.GetPixel(TX, TY);
Console.Write(color.B.ToString().PadLeft(5));
}
Console.WriteLine();
}
}
Althought I'm printing out only blue channel value, it's still clearly incorrect.
On the other hand, your solution partitially works, what makes the thing I've found kind of irrelevant. One more guess I have: what is your system's DPI?
From what I have found helpfull, here are some links:
C++ implementation of bicubic interpolation on
matrix
C# implemetation of bicubic interpolation, lacking the part about rescaling
Thread on gamedev.net which has almost working solution
That's my answer so far, but I will try further.
In this image black colour graph is in the white background. I want to get the pixel length between the two peak waves in the graph and the average amplitude (height of the peak) of the peak waves.
I'm stuck with the logic to implement this code.can anyone help me to implement this. I'm using C#
public void black(Bitmap bmp)
{
Color col;
for (int i = 0; i < bmp.Height; i++)
{
for (int j = 0; j < bmp.Width; j++)
{
col = bmp.GetPixel(j, i);
if (col.R == 0) //check whether black pixel
{
y = i; //assign black pixel x,y positions to a variable
x = j;
}
}
}
}
my supervisor told i have to use a 2D array to store increments and decrements(start point pixel value and end point pixel value of each increment and decrement) of the line to get these values.But i haven't sufficient coding skills to apply that logic to this code.
Bitmap img = new Bitmap(pictureBox1.Image);
int width = img.Width;
int height = img.Height;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
Color pixelColor = img.GetPixel(x, y);
if (pixelColor.R == 0 && pixelColor.G == 0 && pixelColor.B == 0)
//listBox1.Items.Add(String.Format("x:{0} y:{1}", x, y));
textBox1.Text = (String.Format("x:{0} y:{1}", x, y));
}
}
Suppose I have a System.Drawing.Bitmap in 32bpp ARGB mode. It's a large bitmap, but it's mostly fully transparent pixels with a relatively small image somewhere in the middle.
What is a fast algorithm to detect the borders of the "real" image, so I can crop away all the transparent pixels from around it?
Alternatively, is there a function already in .Net that I can use for this?
The basic idea is to check every pixel of the image to find the top, left, right and bottom bounds of the image. To do this efficiently, don't use the GetPixel method, which is pretty slow. Use LockBits instead.
Here's the implementation I came up with:
static Bitmap TrimBitmap(Bitmap source)
{
Rectangle srcRect = default(Rectangle);
BitmapData data = null;
try
{
data = source.LockBits(new Rectangle(0, 0, source.Width, source.Height), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] buffer = new byte[data.Height * data.Stride];
Marshal.Copy(data.Scan0, buffer, 0, buffer.Length);
int xMin = int.MaxValue;
int xMax = 0;
int yMin = int.MaxValue;
int yMax = 0;
for (int y = 0; y < data.Height; y++)
{
for (int x = 0; x < data.Width; x++)
{
byte alpha = buffer[y * data.Stride + 4 * x + 3];
if (alpha != 0)
{
if (x < xMin) xMin = x;
if (x > xMax) xMax = x;
if (y < yMin) yMin = y;
if (y > yMax) yMax = y;
}
}
}
if (xMax < xMin || yMax < yMin)
{
// Image is empty...
return null;
}
srcRect = Rectangle.FromLTRB(xMin, yMin, xMax, yMax);
}
finally
{
if (data != null)
source.UnlockBits(data);
}
Bitmap dest = new Bitmap(srcRect.Width, srcRect.Height);
Rectangle destRect = new Rectangle(0, 0, srcRect.Width, srcRect.Height);
using (Graphics graphics = Graphics.FromImage(dest))
{
graphics.DrawImage(source, destRect, srcRect, GraphicsUnit.Pixel);
}
return dest;
}
It can probably be optimized, but I'm not a GDI+ expert, so it's the best I can do without further research...
EDIT: actually, there's a simple way to optimize it, by not scanning some parts of the image :
scan left to right until you find a non-transparent pixel; store (x, y) into (xMin, yMin)
scan top to bottom until you find a non-transparent pixel (only for x >= xMin); store y into yMin
scan right to left until you find a non-transparent pixel (only for y >= yMin); store x into xMax
scan bottom to top until you find a non-transparent pixel (only for xMin <= x <= xMax); store y into yMax
EDIT2: here's an implementation of the approach above:
static Bitmap TrimBitmap(Bitmap source)
{
Rectangle srcRect = default(Rectangle);
BitmapData data = null;
try
{
data = source.LockBits(new Rectangle(0, 0, source.Width, source.Height), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] buffer = new byte[data.Height * data.Stride];
Marshal.Copy(data.Scan0, buffer, 0, buffer.Length);
int xMin = int.MaxValue,
xMax = int.MinValue,
yMin = int.MaxValue,
yMax = int.MinValue;
bool foundPixel = false;
// Find xMin
for (int x = 0; x < data.Width; x++)
{
bool stop = false;
for (int y = 0; y < data.Height; y++)
{
byte alpha = buffer[y * data.Stride + 4 * x + 3];
if (alpha != 0)
{
xMin = x;
stop = true;
foundPixel = true;
break;
}
}
if (stop)
break;
}
// Image is empty...
if (!foundPixel)
return null;
// Find yMin
for (int y = 0; y < data.Height; y++)
{
bool stop = false;
for (int x = xMin; x < data.Width; x++)
{
byte alpha = buffer[y * data.Stride + 4 * x + 3];
if (alpha != 0)
{
yMin = y;
stop = true;
break;
}
}
if (stop)
break;
}
// Find xMax
for (int x = data.Width - 1; x >= xMin; x--)
{
bool stop = false;
for (int y = yMin; y < data.Height; y++)
{
byte alpha = buffer[y * data.Stride + 4 * x + 3];
if (alpha != 0)
{
xMax = x;
stop = true;
break;
}
}
if (stop)
break;
}
// Find yMax
for (int y = data.Height - 1; y >= yMin; y--)
{
bool stop = false;
for (int x = xMin; x <= xMax; x++)
{
byte alpha = buffer[y * data.Stride + 4 * x + 3];
if (alpha != 0)
{
yMax = y;
stop = true;
break;
}
}
if (stop)
break;
}
srcRect = Rectangle.FromLTRB(xMin, yMin, xMax, yMax);
}
finally
{
if (data != null)
source.UnlockBits(data);
}
Bitmap dest = new Bitmap(srcRect.Width, srcRect.Height);
Rectangle destRect = new Rectangle(0, 0, srcRect.Width, srcRect.Height);
using (Graphics graphics = Graphics.FromImage(dest))
{
graphics.DrawImage(source, destRect, srcRect, GraphicsUnit.Pixel);
}
return dest;
}
There won't be a significant gain if the non-transparent part is small of course, since it will still scan most of the pixels. But if it's big, only the rectangles around the non-transparent part will be scanned.
I would like to suggest a divide & conquer approach:
split the image in the middle (e.g. vertically)
check if there are non-transparent pixels on the cut line (if so, remember min/max for bounding box)
split left half again vertically
if cut line contains non-transparent pixels -> update bounding box
if not, you can probably discard the leftmost half (I don't know the pictures)
continue with the left-right half (you stated that the image is somewhere in the middle) until you find the leftmost bound of the image
do the same for the right half