I am looking to create an external application that monitors the 'FPS' of a DirectX application (like FRAPS without the recording). I have read several Microsoft articles on performance measuring tools - but I am looking to get the feedback (and experience) of the community.
My question: what is the best method for obtaining the FPS of a DirectX application?
Windows has some Event Tracing for Windows providers related to DirectX profiling. The most intresting ones are Microsoft-Windows-D3D9 and Microsoft-Windows-DXGI, which allow tracing of the frame presentation events. The simplest way to calculate FPS is to count the number of PresentStart events withing a time interval and divide that by the length of the interval.
To work with ETW in C#, install Microsoft.Diagnostics.Tracing.TraceEvent package.
The following code sample displays FPS of running processes:
using System;
using System.Collections.Generic;
using System.Text;
using System.Diagnostics;
using System.Threading;
using Microsoft.Diagnostics.Tracing.Session;
namespace ConsoleApp1
{
//helper class to store frame timestamps
public class TimestampCollection
{
const int MAXNUM = 1000;
public string Name { get; set; }
List<long> timestamps = new List<long>(MAXNUM + 1);
object sync = new object();
//add value to the collection
public void Add(long timestamp)
{
lock (sync)
{
timestamps.Add(timestamp);
if (timestamps.Count > MAXNUM) timestamps.RemoveAt(0);
}
}
//get the number of timestamps withing interval
public int QueryCount(long from, long to)
{
int c = 0;
lock (sync)
{
foreach (var ts in timestamps)
{
if (ts >= from && ts <= to) c++;
}
}
return c;
}
}
class Program
{
//event codes (https://github.com/GameTechDev/PresentMon/blob/40ee99f437bc1061a27a2fc16a8993ee8ce4ebb5/PresentData/PresentMonTraceConsumer.cpp)
public const int EventID_D3D9PresentStart = 1;
public const int EventID_DxgiPresentStart = 42;
//ETW provider codes
public static readonly Guid DXGI_provider = Guid.Parse("{CA11C036-0102-4A2D-A6AD-F03CFED5D3C9}");
public static readonly Guid D3D9_provider = Guid.Parse("{783ACA0A-790E-4D7F-8451-AA850511C6B9}");
static TraceEventSession m_EtwSession;
static Dictionary<int, TimestampCollection> frames = new Dictionary<int, TimestampCollection>();
static Stopwatch watch = null;
static object sync = new object();
static void EtwThreadProc()
{
//start tracing
m_EtwSession.Source.Process();
}
static void OutputThreadProc()
{
//console output loop
while (true)
{
long t1, t2;
long dt = 2000;
Console.Clear();
Console.WriteLine(DateTime.Now.ToString() + "." + DateTime.Now.Millisecond.ToString());
Console.WriteLine();
lock (sync)
{
t2 = watch.ElapsedMilliseconds;
t1 = t2 - dt;
foreach (var x in frames.Values)
{
Console.Write(x.Name + ": ");
//get the number of frames
int count = x.QueryCount(t1, t2);
//calculate FPS
Console.WriteLine("{0} FPS", (double)count / dt * 1000.0);
}
}
Console.WriteLine();
Console.WriteLine("Press any key to stop tracing...");
Thread.Sleep(1000);
}
}
public static void Main(string[] argv)
{
//create ETW session and register providers
m_EtwSession = new TraceEventSession("mysess");
m_EtwSession.StopOnDispose = true;
m_EtwSession.EnableProvider("Microsoft-Windows-D3D9");
m_EtwSession.EnableProvider("Microsoft-Windows-DXGI");
//handle event
m_EtwSession.Source.AllEvents += data =>
{
//filter out frame presentation events
if (((int)data.ID == EventID_D3D9PresentStart && data.ProviderGuid == D3D9_provider) ||
((int)data.ID == EventID_DxgiPresentStart && data.ProviderGuid == DXGI_provider))
{
int pid = data.ProcessID;
long t;
lock (sync)
{
t = watch.ElapsedMilliseconds;
//if process is not yet in Dictionary, add it
if (!frames.ContainsKey(pid))
{
frames[pid] = new TimestampCollection();
string name = "";
var proc = Process.GetProcessById(pid);
if (proc != null)
{
using (proc)
{
name = proc.ProcessName;
}
}
else name = pid.ToString();
frames[pid].Name = name;
}
//store frame timestamp in collection
frames[pid].Add(t);
}
}
};
watch = new Stopwatch();
watch.Start();
Thread thETW = new Thread(EtwThreadProc);
thETW.IsBackground = true;
thETW.Start();
Thread thOutput = new Thread(OutputThreadProc);
thOutput.IsBackground = true;
thOutput.Start();
Console.ReadKey();
m_EtwSession.Dispose();
}
}
}
Based on the source code of PresentMon project.
Fraps inserts a DLL into every running application and hooks specific DX calls to figure out the framerate and capture video, pretty sure that you'll have to do something similar. After a bit of poking around I found a Github project that does some basic DX hooking for doing captures and overlays, so that might be a good spot to start out with. Though I've not used it personally so I can't totally vouch for the quality.
http://spazzarama.com/2011/03/14/c-screen-capture-and-overlays-for-direct3d-9-10-and-11-using-api-hooks/
Building on https://stackoverflow.com/a/54625953/12047161:
I had more success not using the stopwatch as the event triggers seems to be asynchronous with the actual frames. I kept getting batches of 20-50 frames all at once, making the estimated FPS fluctuate between 50 and 250% of the actual value.
Instead i used TimeStampRelativeMSec
//handle event
m_EtwSession.Source.AllEvents += data =>
{
//filter out frame presentation events
if((int) data.ID == EventID_DxgiPresentStart && data.ProviderGuid == DXGI_provider)
{
int pid = data.ProcessID;
long t;
t = watch.ElapsedMilliseconds;
//if process is not yet in Dictionary, add it
if (!frames.ContainsKey(pid))
{
frames[pid] = new TimestampCollection();
string name = "";
var proc = Process.GetProcessById(pid);
if (proc != null)
{
using (proc)
{
name = proc.ProcessName;
}
}
else name = pid.ToString();
frames[pid].Name = name;
}
frames[pid].Add((long)data.TimeStampRelativeMSec);
}
};
property from the TraceEvent class, and calculate FPS by rounding the average time between an arbitrary number of past entries:
public double GetFrameTime(int count)
{
double returnValue = 0;
int listCount = timestamps.Count;
if(listCount > count)
{
for(int i = 1; i <= count; i++)
{
returnValue += timestamps[listCount - i] - timestamps[listCount - (i + 1)];
}
returnValue /= count;
}
return returnValue;
}
This method gave me far more accurate (Compared to, as available, in-game counters) of several different games i've tried.
Related
I'm dealing with c# concurrent-queue and multi-threading in socket-programming tcp/ip
First, I've already done with socket-programming itself. That means, I've already finished coding about client, server and stuffs about communication itself
basic structure is pipe-lined(producer-consumer problem) and now I'm doing with bit conversion
below is brief summary about my code
client-socket ->server-socket -> concurrent_queue_1(with type byte[65536],Thread_1 process this) -> concurrent_queue_2(with type double[40,3500], Thread_2 process this) -> display-data or other work(It can be gpu-work)
*(double[40,3500] can be changed to other size)
Till now,I've implemented putting_data into queue1(Thread1) and just dequeuing all(Thread2) and, its speed is about 700Mbps
The reason I used two concurrent_queue is, I want communication,and type conversion work to be processed in background regardless of main procedure about control things.
Here is the code about my own concurrent_queue with Blocking
public class BlockingConcurrentQueue<T> : IDisposable
{
private readonly ConcurrentQueue<T> _internalQueue;
private AutoResetEvent _autoResetEvent;
private long _consumed;
private long _isAddingCompleted = 0;
private long _produced;
private long _sleeping;
public BlockingConcurrentQueue()
{
_internalQueue = new ConcurrentQueue<T>();
_produced = 0;
_consumed = 0;
_sleeping = 0;
_autoResetEvent = new AutoResetEvent(false);
}
public bool IsAddingCompleted
{
get
{
return Interlocked.Read(ref _isAddingCompleted) == 1;
}
}
public bool IsCompleted
{
get
{
if (Interlocked.Read(ref _isAddingCompleted) == 1 && _internalQueue.IsEmpty)
return true;
else
return false;
}
}
public void CompleteAdding()
{
Interlocked.Exchange(ref _isAddingCompleted, 1);
}
public void Dispose()
{
_autoResetEvent.Dispose();
}
public void Enqueue(T item)
{
_internalQueue.Enqueue(item);
if (Interlocked.Read(ref _isAddingCompleted) == 1)
throw new InvalidOperationException("Adding Completed.");
Interlocked.Increment(ref _produced);
if (Interlocked.Read(ref _sleeping) == 1)
{
Interlocked.Exchange(ref _sleeping, 0);
_autoResetEvent.Set();
}
}
public bool TryDequeue(out T result)
{
if (Interlocked.Read(ref _consumed) == Interlocked.Read(ref _produced))
{
Interlocked.Exchange(ref _sleeping, 1);
_autoResetEvent.WaitOne();
}
if (_internalQueue.TryDequeue(out result))
{
Interlocked.Increment(ref _consumed);
return true;
}
return false;
}
}
My question is here
As I mentioned above, concurrent_queue1's type is byte[65536] and 65536 bytes = 8192 double data.
(40 * 3500=8192 * 17.08984375)
I want merge multiple 8192 double data into form of double[40,3500](size can be changed)and enqueue to concurrent_queue2 with Thread2
It's easy to do it with naive-approach(using many complex for loop) but it's slow cuz, It copys all the
data and expose to upper class or layer.
I'm searching method automatically enqueuing with matched size like foreach loop automatically iterates through 2D-array in row-major way, not yet found
Is there any fast way to merge 1D-byte array into form of 2D-double array and enqueue it?
Thanks for your help!
I try to understand your conversion rule, so I write this conversion code. Use Parallel to speed up the calculation.
int maxSize = 65536;
byte[] dim1Array = new byte[maxSize];
for (int i = 0; i < maxSize; ++i)
{
dim1Array[i] = byte.Parse((i % 256).ToString());
}
int dim2Row = 40;
int dim2Column = 3500;
int byteToDoubleRatio = 8;
int toDoubleSize = maxSize / byteToDoubleRatio;
double[,] dim2Array = new double[dim2Row, dim2Column];
Parallel.For(0, toDoubleSize, i =>
{
int row = i / dim2Column;
int col = i % dim2Column;
int originByteIndex = row * dim2Column * byteToDoubleRatio + col * byteToDoubleRatio;
dim2Array[row, col] = BitConverter.ToDouble(
dim1Array,
originByteIndex);
});
I wrote this code in purpose to test multi and single threading speeds. Thanks for all the feedback! I rewrote most of it based on the great comments I received. This now functions properly (maybe has a bug here or there), tests multi threads first, and takes an average to find a more accurate speed: (Scroll to bottom for cont.)
Main method Class
using System;
namespace SingleAndMultiThreading
{
internal class Threads
{
private static void Main(string[] args)
{
long numOfObjCreated;
int numberOfTests;
while (true)
{
try
{
Console.Write("Number of objects to create: ");
numOfObjCreated = Convert.ToInt64(Console.ReadLine());
break;
}
catch (Exception)
{
Console.WriteLine("Invalid input.");
}
}
while (true)
{
try
{
Console.Write("Number of tests to run: ");
numberOfTests = Convert.ToInt32(Console.ReadLine());
break;
}
catch (Exception)
{
Console.WriteLine("Invalid input.");
}
}
CalculateResults(numOfObjCreated, numberOfTests);
Console.ReadKey();
}
private static void CalculateResults(long numOfObjCreated, int numberOfTests)
{
double totalPercentages = 0;
for (var i = 0; i < numberOfTests; i++)
{
totalPercentages += CompleteTests(numOfObjCreated);
}
var accuracy = totalPercentages / numberOfTests;
if ((int)accuracy == 0)
{
Console.WriteLine("\nIn this case, neither single threading or multithreading is faster.\n" +
"They both run equally well under these conditions.\n");
return;
}
if (accuracy < 0)
{
Console.WriteLine("\nIn this case with {0} objects being created, single threading is faster!\n",
string.Format("{0:#,###0}", numOfObjCreated));
return;
}
Console.WriteLine("\nFrom {0} test(s), {1}% was the average percentage of increased speed in multithreading.\n",
string.Format("{0:#,###0}", numberOfTests), string.Format("{0:#,###0}", accuracy));
}
private static double CompleteTests(long numOfObjCreated)
{
Console.WriteLine("Computing...");
var numOfCores = Environment.ProcessorCount;
var timeForMultiThread = MultiThread.Run(numOfObjCreated, numOfCores);
var timeForSingleThread = SingleThread.Run(numOfObjCreated);
var percentFaster = ((timeForSingleThread / timeForMultiThread) * 100) - 100;
//note: .NET does its part in assigning a certian thread to its own core
Console.WriteLine("Using all {0} cores, creating {1} objects is {2}% faster.",
numOfCores, string.Format("{0:#,###0}", numOfObjCreated), string.Format("{0:#,###0}", percentFaster));
return percentFaster;
}
}
}
Single Threading Class
using System;
using System.Diagnostics;
namespace SingleAndMultiThreading
{
internal class SingleThread
{
public static double Run(long numOfObjCreated)
{
var watch = new Stopwatch();
watch.Start();
for (long i = 0; i < numOfObjCreated; i++)
{
new object();
}
watch.Stop();
var totalTime = watch.ElapsedTicks;
Console.WriteLine("The time to create {0} objects with 1 thread is: {1} ticks.",
string.Format("{0:#,###0}", numOfObjCreated), string.Format("{0:#,###0}", totalTime));
return totalTime;
}
}
}
Multi Threading Class
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading;
namespace SingleAndMultiThreading
{
internal class MultiThread
{
public static double Run(long numOfObjCreated, int numOfCores)
{
var watch = new Stopwatch();
var workerObject = new Worker(numOfObjCreated / numOfCores);
var listOfThreads = new List<Thread>();
for (long k = 0; k < numOfCores; k++)
{
var workerThread = new Thread(workerObject.DoWork);
listOfThreads.Add(workerThread);
}
watch.Start();
foreach (var thread in listOfThreads)
{
thread.Start();
}
byte countOfCompletedThreads = 0;
while (true)
{
foreach (var thread in listOfThreads)
if (!thread.IsAlive)
countOfCompletedThreads++;
if (countOfCompletedThreads == numOfCores)
break;
countOfCompletedThreads = 0;
}
watch.Stop();
var totalTime = watch.ElapsedTicks;
Console.WriteLine("The time to create {0} objects utilizing all {1} cores is: {2} ticks.",
string.Format("{0:#,###0}", numOfObjCreated), numOfCores, string.Format("{0:#,###0}", totalTime));
return totalTime;
}
}
}
Worker Class
namespace SingleAndMultiThreading
{
public class Worker
{
private readonly long _numOfObjToCreate;
public bool IsDone;
public Worker(long numOfObjToCreate)
{
_numOfObjToCreate = numOfObjToCreate;
}
public void DoWork()
{
for (long i = 0; i < _numOfObjToCreate; i++)
{
new object();
}
IsDone = true;
}
}
}
The output of this code is a bit too long to post (I urge you to copy and paste into your own IDE, its really fascinating). I guess the accepted answer that this doesn't give the same result per every test is due to CPU scheduling, other or minor issues like ASLR and such. More than one thing is happening aside from visual studio running this program, and priorities differently. Also thank you for pointing out that running multi threading first helps because of the already-done memory allocation!
Another thing to point out, I found this while running:
The spikes are when the process of multi threading takes place.
Here's what I'm trying to do:
Get one html page from url which contains multiple links inside
Visit each link
Extract some data from visited link and create object using it
So far All i did is just simple and slow way:
public List<Link> searchLinks(string name)
{
List<Link> foundLinks = new List<Link>();
// getHtmlDocument() just returns HtmlDocument using input url.
HtmlDocument doc = getHtmlDocument(AU_SEARCH_URL + fixSpaces(name));
var link_list = doc.DocumentNode.SelectNodes(#"/html/body/div[#id='parent-container']/div[#id='main-content']/ol[#id='searchresult']/li/h2/a");
foreach (var link in link_list)
{
// TODO Threads
// getObject() creates object using data gathered
foundLinks.Add(getObject(link.InnerText, link.Attributes["href"].Value, getLatestEpisode(link.Attributes["href"].Value)));
}
return foundLinks;
}
To make it faster/efficient I need to implement threads, but I'm not sure how i should approach it, because I can't just randomly start threads, I need to wait for them to finish, thread.Join() kind of solves 'wait for threads to finish' problem, but it becomes not fast anymore i think, because threads will be launched after earlier one is finished.
The simplest way to offload the work to multiple threads would be to use Parallel.ForEach() in place of your current loop. Something like this:
Parallel.ForEach(link_list, link =>
{
foundLinks.Add(getObject(link.InnerText, link.Attributes["href"].Value, getLatestEpisode(link.Attributes["href"].Value)));
});
I'm not sure if there are other threading concerns in your overall code. (Note, for example, that this would no longer guarantee that the data would be added to foundLinks in the same order.) But as long as there's nothing explicitly preventing concurrent work from taking place then this would take advantage of threading over multiple CPU cores to process the work.
Maybe you should use Thread pool :
Example from MSDN :
using System;
using System.Threading;
public class Fibonacci
{
private int _n;
private int _fibOfN;
private ManualResetEvent _doneEvent;
public int N { get { return _n; } }
public int FibOfN { get { return _fibOfN; } }
// Constructor.
public Fibonacci(int n, ManualResetEvent doneEvent)
{
_n = n;
_doneEvent = doneEvent;
}
// Wrapper method for use with thread pool.
public void ThreadPoolCallback(Object threadContext)
{
int threadIndex = (int)threadContext;
Console.WriteLine("thread {0} started...", threadIndex);
_fibOfN = Calculate(_n);
Console.WriteLine("thread {0} result calculated...", threadIndex);
_doneEvent.Set();
}
// Recursive method that calculates the Nth Fibonacci number.
public int Calculate(int n)
{
if (n <= 1)
{
return n;
}
return Calculate(n - 1) + Calculate(n - 2);
}
}
public class ThreadPoolExample
{
static void Main()
{
const int FibonacciCalculations = 10;
// One event is used for each Fibonacci object.
ManualResetEvent[] doneEvents = new ManualResetEvent[FibonacciCalculations];
Fibonacci[] fibArray = new Fibonacci[FibonacciCalculations];
Random r = new Random();
// Configure and start threads using ThreadPool.
Console.WriteLine("launching {0} tasks...", FibonacciCalculations);
for (int i = 0; i < FibonacciCalculations; i++)
{
doneEvents[i] = new ManualResetEvent(false);
Fibonacci f = new Fibonacci(r.Next(20, 40), doneEvents[i]);
fibArray[i] = f;
ThreadPool.QueueUserWorkItem(f.ThreadPoolCallback, i);
}
// Wait for all threads in pool to calculate.
WaitHandle.WaitAll(doneEvents);
Console.WriteLine("All calculations are complete.");
// Display the results.
for (int i= 0; i<FibonacciCalculations; i++)
{
Fibonacci f = fibArray[i];
Console.WriteLine("Fibonacci({0}) = {1}", f.N, f.FibOfN);
}
}
}
I have to count the input frequency of the parallel port at Pin no.13, comming from a 555 timer IC, the real frequency should be around 3-4 Hz (ON Pulse). I have tried several codes, several times but every time those are giving different values. I have tried the following code:
[DllImport("inpout32.dll", EntryPoint = "Inp32")]
public static extern int Input(int adress);
private void button1_Click(object sender, EventArgs e)
{
int currentState = Input(889);
int LastState;
while (true)
{
int State = Input(889);
if (State != currentState)
{
if (Input(889) == 120)
{
LastState = 0;
}
else
{
LastState = 1;
}
break;
}
}
GetFreq(LastState);
}
void GetFreq(int LastPulse)
{
int highPulseFreq = 0;
int lowPulseFreq = 0;
if (LastPulse == 1)
{
highPulseFreq++;
}
if (LastPulse == 0)
{
lowPulseFreq++;
}
int startTime = DateTime.Now.Second;
while (true)
{
if (startTime == DateTime.Now.Second)
{
if (Input(889) != 120)// ON
{
if (LastPulse == 0)
{
highPulseFreq++;
LastPulse = 1;
}
}
else
{
if (LastPulse == 1)
{
lowPulseFreq++;
LastPulse = 0;
}
}
}
else
{
MessageBox.Show("ON Pulses: " + highPulseFreq.ToString() + Environment.NewLine + "OFF Pulses: " + lowPulseFreq.ToString());
break;
}
}
}
OUTPUT:
What should I do, to get accurate frequency? Is any thing wrong in my code?
I am using the inpout32.dll to control parallel port.
You're doing all sorts of things a bit wrong. First, you're counting pulses for an entire second, you're counting pulses for up to a second (depends on where in the second GetFreq is called).
Second, you're counting up and down pulses, although I think the frequency should be the number of up (or down) pulses each second, not both of them (that would be double the frequency).
And finally, if you want to measure 3 or 4 Hz, measuring for one second is going to introduce rounding errors. Try measuring for 5 seconds. Use a Stopwatch to measure those 5 seconds.
Try using the following function instead:
double GetFreq(long time, out int highCount, out int lowCount)
{
const int ADDRESS = 0x378 + 1, MASK = 0x10;
highCount = lowCount = 0;
bool LastState = (Input(ADDRESS) & MASK) == MASK;
if (LastState)
{
highCount++;
}
else
{
lowCount++;
}
System.Diagnostics.Stopwatch stopwatch = new System.Diagnostics.Stopwatch();
stopwatch.Start();
while (stopwatch.ElapsedMilliseconds <= time)
{
if ((Input(ADDRESS) & MASK) == MASK) // High
{
if (!LastState)
{
highCount++;
LastState = true;
}
}
else
{
if (!LastState)
{
lowCount++;
LastState = false;
}
}
}
stopwatch.Stop();
return ((double)(highCount + lowCount)) / time * 500
}
And when you need to call the function, just do the following:
int highCount, lowCount;
double frequenct = GetFreq(1000, out highCount, out lowCount);
In my code, I used bitwise operator AND to mask out unnecessary bits, which should be better than directly comparing against 120. Remember when the results are bitwise, never compare directly using == or != operators.
I used System.Diagnostics.Stopwatch which is a lot more precise than using DateTime.Now.Second.
You need to sample your signal at rate that is at least twice the highest frequency in your signal. If your expected highest frequency is about 4Hz, then sampling the signal anywhere from 15 - 20Hz should give good results.
Fortunately, sampling at this rate is something that can be done without too much futzing around with high precision timers on Windows (if you don't require a lot of accuracy). A 20Hz sample rate corresponds to a sample period of 50ms, so you can use a loop where you sleep for about 50ms between recording sample values. You won't get a super precise delta-T between samples (you may see variations of up to 15-30ms in the time between each sample, depending on your system), but it should be good enough for the frequencies you're dealing with.
You can record several seconds worth of samples (and associated timestamps), and then export the data to a spreadsheet. Once in the spreadsheet, you can do some analysis and graphing. Or you can find some time series analysis code to analyze the list of samples, such as using a Fourier transform (FFT) to convert a signal from the time domain to the frequency domain.
Here is an example of creating the samples. You can replace the use of DateTime.Now with a StopWatch in GetInputSamples if you really need more accuracy in the timestamps.
[DllImport("inpout32.dll", EntryPoint = "Inp32")]
public static extern int Input(int adress);
struct Sample
{
public int Value;
public int Milliseconds;
};
private void button1_Click(object sender, EventArgs e)
{
TimeSpan duration = TimeSpan.FromSeconds(5);
TimeSpan samplePeriod = TimeSpan.FromMilliseconds(50);
var samples = GetInputSamples(889, duration, samplePeriod);
SaveSamplesCSV(samples, "test.csv");
}
private static List<Sample> GetInputSamples(int inputPort, TimeSpan duration, TimeSpan samplePeriod)
{
List<Sample> samples = new List<Sample>();
var oldPriority = Thread.CurrentThread.Priority;
try
{
Thread.CurrentThread.Priority = ThreadPriority.Highest;
DateTime start = DateTime.Now;
while (DateTime.Now - start < duration)
{
int value = Input(inputPort);
TimeSpan timestamp = DateTime.Now - start;
samples.Add(new Sample() { Value = value, Milliseconds = (int)timestamp.TotalMilliseconds });
Thread.Sleep(samplePeriod);
}
}
finally
{
Thread.CurrentThread.Priority = oldPriority;
}
return samples;
}
private static void SaveSamplesCSV(List<Sample> samples, string fileName)
{
using (StreamWriter writer = File.CreateText(fileName))
{
writer.WriteLine("Sample, Time (ms)");
foreach (var sample in samples)
{
writer.WriteLine("{0}, {1}", sample.Value, sample.Milliseconds);
}
}
}
I am new to multithreading in C# . I have a 3D array of size (x)(y)(z) and say i want to calculate the average of all the z samples for every (x,y) values. I wish to do that using multithreading (say 2 threads) where i will send half the array of size (x/2)*y*z for processing to thread1 and the other half to thread2.
How to do it? How do I pass and retrieve arguments from individual threads? A code example will be helpful.
Regards
I would recommend using PLINQ for this instead of threading this yourself.
It will let you run your query using LINQ syntax, but parallelize it (across all of your cores) automatically.
There are many reasons why it makes sense to use something PLINQ (as mentioned by Reed) or Parallel.For as implementing a low-overhead scheduler for distributing jobs over several cpus is a bit challenging.
So if I understood you correctly maybe this could get you started (on my 4 core machine the parallel version is 3 times faster than the single core version):
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
class Program
{
static void AverageOfZ (
double[] input,
double[] result,
int x,
int y,
int z
)
{
Debug.Assert(input.Length == x*y*z);
Debug.Assert(result.Length == x*y);
//Replace Parallel with Sequential to compare with non-parallel loop
//Sequential.For(
Parallel.For(
0,
x*y,
i =>
{
var begin = i*z;
var end = begin + z;
var sum = 0.0;
for (var iter = begin; iter < end; ++iter)
{
sum += input[iter];
}
result[i] = sum/z;
});
}
static void Main(string[] args)
{
const int X = 64;
const int Y = 64;
const int Z = 64;
const int Repetitions = 40000;
var random = new Random(19740531);
var samples = Enumerable.Range(0, X*Y*Z).Select(x => random.NextDouble()).ToArray();
var result = new double[X*Y];
var then = DateTime.Now;
for (var iter = 0; iter < Repetitions; ++iter)
{
AverageOfZ(samples, result, X, Y, Z);
}
var diff = DateTime.Now - then;
Console.WriteLine(
"{0} samples processed {1} times in {2} seconds",
samples.Length,
Repetitions,
diff.TotalSeconds
);
}
}
static class Sequential
{
public static void For(int from, int to, Action<int> action)
{
for (var iter = from; iter < to; ++iter)
{
action(iter);
}
}
}
PS. When going for concurrent performance its important to consider how the different cores access memory as its very easy to get disappointing performance otherwise.
Dot Net 3.5 and onward introduced many shortcut keywords that abstract away the complexity of things like Parallel for multi threading or Async for Async IO. Unfortunately this also provides no opportunity for understanding whats involved in these tasks. For example a colleague of mine was recently trying to use Async for a login method which returned an authentication token.
Here is the full blown multi threaded sample code for your scenario. to make it more real the sample code pretends that:
X is Longitude
Y is Lattitude
and Z is Rainfall Samples at the coordinates
The sample code also follows the Unit of Work design pattern where Rainfall Samples at each coordinate becomes a work item. It also creates discrete foreground threads instead of using a background threadpool.
Due to the simplicity of the work item and short compute time involved I've split the thread synchronization locks into two locks. one for the work queue and one for the output data.
Note: I have not used any Dot net shortcuts such as Lync so this code should run on Dot Net 2.0 as well.
In real world app development something like whats below would only be needed in complex scenarios such as stream processing of a continuous stream of work items in which case you would also need to implement output data buffers cleared regularly as the threads would effectively run forever.
public static class MultiThreadSumRainFall
{
const int LongitudeSize = 64;
const int LattitudeSize = 64;
const int RainFallSamplesSize = 64;
const int SampleMinValue = 0;
const int SampleMaxValue = 1000;
const int ThreadCount = 4;
public static void SumRainfallAndOutputValues()
{
int[][][] SampleData;
SampleData = GenerateSampleRainfallData();
for (int Longitude = 0; Longitude < LongitudeSize; Longitude++)
{
for (int Lattitude = 0; Lattitude < LattitudeSize; Lattitude++)
{
QueueWork(new WorkItem(Longitude, Lattitude, SampleData[Longitude][Lattitude]));
}
}
System.Threading.ThreadStart WorkThreadStart;
System.Threading.Thread WorkThread;
List<System.Threading.Thread> RunningThreads;
WorkThreadStart = new System.Threading.ThreadStart(ParallelSum);
int NumThreads;
NumThreads = ThreadCount;
if (ThreadCount < 1)
{
NumThreads = 1;
}
else if (NumThreads > (Environment.ProcessorCount + 1))
{
NumThreads = Environment.ProcessorCount + 1;
}
OutputData = new int[LongitudeSize, LattitudeSize];
RunningThreads = new List<System.Threading.Thread>();
for (int I = 0; I < NumThreads; I++)
{
WorkThread = new System.Threading.Thread(WorkThreadStart);
WorkThread.Start();
RunningThreads.Add(WorkThread);
}
bool AllThreadsComplete;
AllThreadsComplete = false;
while (!AllThreadsComplete)
{
System.Threading.Thread.Sleep(100);
AllThreadsComplete = true;
foreach (System.Threading.Thread WorkerThread in RunningThreads)
{
if (WorkerThread.IsAlive)
{
AllThreadsComplete = false;
}
}
}
for (int Longitude = 0; Longitude < LongitudeSize; Longitude++)
{
for (int Lattitude = 0; Lattitude < LattitudeSize; Lattitude++)
{
Console.Write(string.Concat(OutputData[Longitude, Lattitude], #" "));
}
Console.WriteLine();
}
}
private class WorkItem
{
public WorkItem(int _Longitude, int _Lattitude, int[] _RainFallSamples)
{
Longitude = _Longitude;
Lattitude = _Lattitude;
RainFallSamples = _RainFallSamples;
}
public int Longitude { get; set; }
public int Lattitude { get; set; }
public int[] RainFallSamples { get; set; }
}
public static int[][][] GenerateSampleRainfallData()
{
int[][][] Result;
Random Rnd;
Rnd = new Random();
Result = new int[LongitudeSize][][];
for(int Longitude = 0; Longitude < LongitudeSize; Longitude++)
{
Result[Longitude] = new int[LattitudeSize][];
for (int Lattidude = 0; Lattidude < LattitudeSize; Lattidude++)
{
Result[Longitude][Lattidude] = new int[RainFallSamplesSize];
for (int Sample = 0; Sample < RainFallSamplesSize; Sample++)
{
Result[Longitude][Lattidude][Sample] = Rnd.Next(SampleMinValue, SampleMaxValue);
}
}
}
return Result;
}
private static object SyncRootWorkQueue = new object();
private static Queue<WorkItem> WorkQueue = new Queue<WorkItem>();
private static void QueueWork(WorkItem SamplesWorkItem)
{
lock(SyncRootWorkQueue)
{
WorkQueue.Enqueue(SamplesWorkItem);
}
}
private static WorkItem DeQueueWork()
{
WorkItem Samples;
Samples = null;
lock (SyncRootWorkQueue)
{
if (WorkQueue.Count > 0)
{
Samples = WorkQueue.Dequeue();
}
}
return Samples;
}
private static int QueueSize()
{
lock(SyncRootWorkQueue)
{
return WorkQueue.Count;
}
}
private static object SyncRootOutputData = new object();
private static int[,] OutputData;
private static void SetOutputData(int Longitude, int Lattitude, int SumSamples)
{
lock(SyncRootOutputData)
{
OutputData[Longitude, Lattitude] = SumSamples;
}
}
private static void ParallelSum()
{
WorkItem SamplesWorkItem;
int SummedResult;
SamplesWorkItem = DeQueueWork();
while (SamplesWorkItem != null)
{
SummedResult = 0;
foreach (int SampleValue in SamplesWorkItem.RainFallSamples)
{
SummedResult += SampleValue;
}
SetOutputData(SamplesWorkItem.Longitude, SamplesWorkItem.Lattitude, SummedResult);
SamplesWorkItem = DeQueueWork();
}
}
}