FFT algorithm getting wrong sound frequency value - c#

I have run this FFT algorithm on a 440Hz audio file. But I get an unexpected sound frequency: 510Hz.
Is the byteArray containing .wav correctly converted into 2 double arrays (Re & Im parts)? The imaginary array contains only 0.
I assume that the highest sound frequency is the maximum of xRe array: please look at the very end of the run() function? Maybe that is my mistake: is it average or something like that?
What is the problem then?
UPDATED: The biggest sum Re+Im is at index = 0 so I get frequency = 0;
Whole project: contains .wav -> just open and run.
using System;
using System.Net;
using System.IO;
namespace FFT {
/**
* Performs an in-place complex FFT.
*
* Released under the MIT License
*
* Copyright (c) 2010 Gerald T. Beauregard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
public class FFT2 {
// Element for linked list in which we store the
// input/output data. We use a linked list because
// for sequential access it's faster than array index.
class FFTElement {
public double re = 0.0; // Real component
public double im = 0.0; // Imaginary component
public FFTElement next; // Next element in linked list
public uint revTgt; // Target position post bit-reversal
}
private static int sampleRate;
private uint m_logN = 0; // log2 of FFT size
private uint m_N = 0; // FFT size
private FFTElement[] m_X; // Vector of linked list elements
/**
*
*/
public FFT2() {
}
/**
* Initialize class to perform FFT of specified size.
*
* #param logN Log2 of FFT length. e.g. for 512 pt FFT, logN = 9.
*/
public void init(uint logN) {
m_logN = logN;
m_N = (uint)(1 << (int)m_logN);
// Allocate elements for linked list of complex numbers.
m_X = new FFTElement[m_N];
for (uint k = 0; k < m_N; k++)
m_X[k] = new FFTElement();
// Set up "next" pointers.
for (uint k = 0; k < m_N - 1; k++)
m_X[k].next = m_X[k + 1];
// Specify target for bit reversal re-ordering.
for (uint k = 0; k < m_N; k++)
m_X[k].revTgt = BitReverse(k, logN);
}
/**
* Performs in-place complex FFT.
*
* #param xRe Real part of input/output
* #param xIm Imaginary part of input/output
* #param inverse If true, do an inverse FFT
*/
public void run(double[] xRe, double[] xIm, bool inverse = false) {
uint numFlies = m_N >> 1; // Number of butterflies per sub-FFT
uint span = m_N >> 1; // Width of the butterfly
uint spacing = m_N; // Distance between start of sub-FFTs
uint wIndexStep = 1; // Increment for twiddle table index
// Copy data into linked complex number objects
// If it's an IFFT, we divide by N while we're at it
FFTElement x = m_X[0];
uint k = 0;
double scale = inverse ? 1.0 / m_N : 1.0;
while (x != null) {
x.re = scale * xRe[k];
x.im = scale * xIm[k];
x = x.next;
k++;
}
// For each stage of the FFT
for (uint stage = 0; stage < m_logN; stage++) {
// Compute a multiplier factor for the "twiddle factors".
// The twiddle factors are complex unit vectors spaced at
// regular angular intervals. The angle by which the twiddle
// factor advances depends on the FFT stage. In many FFT
// implementations the twiddle factors are cached, but because
// array lookup is relatively slow in C#, it's just
// as fast to compute them on the fly.
double wAngleInc = wIndexStep * 2.0 * Math.PI / m_N;
if (inverse == false)
wAngleInc *= -1;
double wMulRe = Math.Cos(wAngleInc);
double wMulIm = Math.Sin(wAngleInc);
for (uint start = 0; start < m_N; start += spacing) {
FFTElement xTop = m_X[start];
FFTElement xBot = m_X[start + span];
double wRe = 1.0;
double wIm = 0.0;
// For each butterfly in this stage
for (uint flyCount = 0; flyCount < numFlies; ++flyCount) {
// Get the top & bottom values
double xTopRe = xTop.re;
double xTopIm = xTop.im;
double xBotRe = xBot.re;
double xBotIm = xBot.im;
// Top branch of butterfly has addition
xTop.re = xTopRe + xBotRe;
xTop.im = xTopIm + xBotIm;
// Bottom branch of butterly has subtraction,
// followed by multiplication by twiddle factor
xBotRe = xTopRe - xBotRe;
xBotIm = xTopIm - xBotIm;
xBot.re = xBotRe * wRe - xBotIm * wIm;
xBot.im = xBotRe * wIm + xBotIm * wRe;
// Advance butterfly to next top & bottom positions
xTop = xTop.next;
xBot = xBot.next;
// Update the twiddle factor, via complex multiply
// by unit vector with the appropriate angle
// (wRe + j wIm) = (wRe + j wIm) x (wMulRe + j wMulIm)
double tRe = wRe;
wRe = wRe * wMulRe - wIm * wMulIm;
wIm = tRe * wMulIm + wIm * wMulRe;
}
}
numFlies >>= 1; // Divide by 2 by right shift
span >>= 1;
spacing >>= 1;
wIndexStep <<= 1; // Multiply by 2 by left shift
}
// The algorithm leaves the result in a scrambled order.
// Unscramble while copying values from the complex
// linked list elements back to the input/output vectors.
x = m_X[0];
while (x != null) {
uint target = x.revTgt;
xRe[target] = x.re;
xIm[target] = x.im;
x = x.next;
}
//looking for max IS THIS IS FREQUENCY
double max = 0, index = 0;
for (int i = 0; i < xRe.Length; i++) {
if (xRe[i] + xIm[i] > max) {
max = xRe[i]*xRe[i] + xIm[i]*xIm[i];
index = i;
}
}
max = Math.Sqrt(max);
/* if the peak is at bin index i then the corresponding
frequency will be i * Fs / N whe Fs is the sample rate in Hz and N is the FFT size.*/
//DONT KNOW WHY THE BIGGEST VALUE IS IN THE BEGINNING
Console.WriteLine("max "+ max+" index " + index + " m_logN" + m_logN + " " + xRe[0]);
max = index * sampleRate / m_logN;
Console.WriteLine("max " + max);
}
/**
* Do bit reversal of specified number of places of an int
* For example, 1101 bit-reversed is 1011
*
* #param x Number to be bit-reverse.
* #param numBits Number of bits in the number.
*/
private uint BitReverse(
uint x,
uint numBits) {
uint y = 0;
for (uint i = 0; i < numBits; i++) {
y <<= 1;
y |= x & 0x0001;
x >>= 1;
}
return y;
}
public static void Main(String[] args) {
// BinaryReader reader = new BinaryReader(System.IO.File.OpenRead(#"C:\Users\Duke\Desktop\e.wav"));
BinaryReader reader = new BinaryReader(File.Open(#"440.wav", FileMode.Open));
//Read the wave file header from the buffer.
int chunkID = reader.ReadInt32();
int fileSize = reader.ReadInt32();
int riffType = reader.ReadInt32();
int fmtID = reader.ReadInt32();
int fmtSize = reader.ReadInt32();
int fmtCode = reader.ReadInt16();
int channels = reader.ReadInt16();
sampleRate = reader.ReadInt32();
int fmtAvgBPS = reader.ReadInt32();
int fmtBlockAlign = reader.ReadInt16();
int bitDepth = reader.ReadInt16();
if (fmtSize == 18) {
// Read any extra values
int fmtExtraSize = reader.ReadInt16();
reader.ReadBytes(fmtExtraSize);
}
int dataID = reader.ReadInt32();
int dataSize = reader.ReadInt32();
// Store the audio data of the wave file to a byte array.
byte[] byteArray = reader.ReadBytes(dataSize);
/* for (int i = 0; i < byteArray.Length; i++) {
Console.Write(byteArray[i] + " ");
}*/
byte[] data = byteArray;
double[] arrRe = new double[data.Length];
for (int i = 0; i < arrRe.Length; i++) {
arrRe[i] = data[i] / 32768.0;
}
double[] arrI = new double[data.Length];
for (int i = 0; i < arrRe.Length; i++) {
arrI[i] = 0;
}
/**
* Initialize class to perform FFT of specified size.
*
* #param logN Log2 of FFT length. e.g. for 512 pt FFT, logN = 9.
*/
Console.WriteLine();
FFT2 fft2 = new FFT2();
uint logN = (uint)Math.Log(data.Length, 2);
fft2.init(logN);
fft2.run(arrRe, arrI);
// After this you have to split that byte array for each channel (Left,Right)
// Wav supports many channels, so you have to read channel from header
Console.ReadLine();
}
}
}

There are a few things that you need to address:
you're not applying a window function prior to the FFT - this will result in spectral leakage in the general case and you may get misleading results, particularly when looking for peaks, as there will be "smearing" of the spectrum.
when looking for peaks you should be looking at the magnitude of FFT output bins, not the individual real and imaginary parts - magnitude = sqrt(re^2 +im^2) (although you don't need to worry about the sqrt if you're just looking for peaks).
having identified a peak you need to convert the bin index into a frequency - if the peak is at bin index i then the corresponding frequency will be i * Fs / N where Fs is the sample rate in Hz and N is the FFT size.
for a real-to-complex FFT you can ignore the second N / 2 output bins as they are just the complex conjugate mirror image of the first N / 2 bins
(See also this answer for fuller explanations of the above.)

Related

What is the Output of a fftLeft array after applying FFTDb function to a waveLeft array C# .Frequencies, or something else?

I am a newcomer to the sound programming. I have a real-time sound visualizer(http://www.codeproject.com/Articles/20025/Sound-visualizer-in-C). I downloaded it from codeproject.com.
In AudioFrame.cs class there is an array as below:
_fftLeft = FourierTransform.FFTDb(ref _waveLeft);
_fftLeft is a double array. _waveLeft is also a double array. As above they applied
FouriorTransform.cs class's FFTDb function to a _waveLeft array.
Here is FFTDb function:
static public double[] FFTDb(ref double[] x)
{
n = x.Length;
nu = (int)(Math.Log(n) / Math.Log(2));
int n2 = n / 2;
int nu1 = nu - 1;
double[] xre = new double[n];
double[] xim = new double[n];
double[] decibel = new double[n2];
double tr, ti, p, arg, c, s;
for (int i = 0; i < n; i++)
{
xre[i] = x[i];
xim[i] = 0.0f;
}
int k = 0;
for (int l = 1; l <= nu; l++)
{
while (k < n)
{
for (int i = 1; i <= n2; i++)
{
p = BitReverse(k >> nu1);
arg = 2 * (double)Math.PI * p / n;
c = (double)Math.Cos(arg);
s = (double)Math.Sin(arg);
tr = xre[k + n2] * c + xim[k + n2] * s;
ti = xim[k + n2] * c - xre[k + n2] * s;
xre[k + n2] = xre[k] - tr;
xim[k + n2] = xim[k] - ti;
xre[k] += tr;
xim[k] += ti;
k++;
}
k += n2;
}
k = 0;
nu1--;
n2 = n2 / 2;
}
k = 0;
int r;
while (k < n)
{
r = BitReverse(k);
if (r > k)
{
tr = xre[k];
ti = xim[k];
xre[k] = xre[r];
xim[k] = xim[r];
xre[r] = tr;
xim[r] = ti;
}
k++;
}
for (int i = 0; i < n / 2; i++)
decibel[i] = 10.0 * Math.Log10((float)(Math.Sqrt((xre[i] * xre[i]) + (xim[i] * xim[i]))));
return decibel;
}
When I play a music note in a guitar i wanted to know it's frequency in a numerical format. I wrote a foreach loop to know what is the output of a _fftLeft array as below,
foreach (double myarray in _fftLeft)
{
Console.WriteLine(myarray );
}
This output's contain lots of real-time values as below .
41.3672743963389
,43.0176034462662,
35.3677383746087,
42.5968946936404,
42.0600935794783,
36.7521669642071,
41.6356709559342,
41.7189032845742,
41.1002451261724,
40.8035583510188,
45.604366914128,
39.645552593115
I want to know what are those values (frequencies or not)? if the answer is frequencies then why it contains low frequency values? And when I play a guitar note I want to detect a frequency of that particular guitar note.
Based on the posted code, FFTDb first computes the FFT then computes and returns the magnitudes of the frequency spectrum in the logarithmic decibels scale. In other words, the _fftLeft then contains magnitudes for a discreet set of frequencies. The actual values of those frequencies can be computed using the array index and sampling frequency according to this answer.
As an example, if you were plotting the _fftLeft output for a pure sinusoidal tone input you should be able to see a clear spike in the index corresponding to the sinusoidal frequency. For a guitar note however you are likely going to see multiple spikes in magnitude corresponding to the harmonics. To detect the note's frequency aka pitch is a more complicated topic and typically requires the use of one of several pitch detection algorithms.

C# Exp cannot get result

When I using Math.Exp() in C# I have some questions?This code is about Kernel density estimation, and I don't have any knowledge about kernel density estimation. So I look up some wiki and some paper.
I try to write it by C#. The problem is when "distance" is getting higher the result is become 0. It's confuse me and I cannot find any other way to get the right result.
disExp = Math.Pow(Math.E, -(distance / 2 * Math.Pow(h, 2)));
So, can any one help me to get the solution? Or give me some idea about Kernel density estimation on C#. Sorry for poor English.
Try this
public static double[,] KernelDensityEstimation(double[] data, double sigma, int nsteps)
{
// probability density function (PDF) signal analysis
// Works like ksdensity in mathlab.
// KDE performs kernel density estimation (KDE)on one - dimensional data
// http://en.wikipedia.org/wiki/Kernel_density_estimation
// Input: -data: input data, one-dimensional
// -sigma: bandwidth(sometimes called "h")
// -nsteps: optional number of abscis points.If nsteps is an
// array, the abscis points will be taken directly from it. (default 100)
// Output: -x: equispaced abscis points
// -y: estimates of p(x)
// This function is part of the Kernel Methods Toolbox(KMBOX) for MATLAB.
// http://sourceforge.net/p/kmbox
// Converted to C# code by ksandric
double[,] result = new double[nsteps, 2];
double[] x = new double[nsteps], y = new double[nsteps];
double MAX = Double.MinValue, MIN = Double.MaxValue;
int N = data.Length; // number of data points
// Find MIN MAX values in data
for (int i = 0; i < N; i++)
{
if (MAX < data[i])
{
MAX = data[i];
}
if (MIN > data[i])
{
MIN = data[i];
}
}
// Like MATLAB linspace(MIN, MAX, nsteps);
x[0] = MIN;
for (int i = 1; i < nsteps; i++)
{
x[i] = x[i - 1] + ((MAX - MIN) / nsteps);
}
// kernel density estimation
double c = 1.0 / (Math.Sqrt(2 * Math.PI * sigma * sigma));
for (int i = 0; i < N; i++)
{
for (int j = 0; j < nsteps; j++)
{
y[j] = y[j] + 1.0 / N * c * Math.Exp(-(data[i] - x[j]) * (data[i] - x[j]) / (2 * sigma * sigma));
}
}
// compilation of the X,Y to result. Good for creating plot(x, y)
for (int i = 0; i < nsteps; i++)
{
result[i, 0] = x[i];
result[i, 1] = y[i];
}
return result;
}
kernel density estimation C#
plot

Weird glitch when creating sine audio signal

I had a look at the wave format today and created a little wave generator. I create a sine sound like this:
public static Wave GetSine(double length, double hz)
{
int bitRate = 44100;
int l = (int)(bitRate * length);
double f = 1.0 / bitRate;
Int16[] data = new Int16[l];
for (int i = 0; i < l; i++)
{
data[i] = (Int16)(Math.Sin(hz * i * f * Math.PI * 2) * Int16.MaxValue);
}
return new Wave(false, Wave.MakeInt16WaveData(data));
}
MakeInt16WaveData looks like this:
public static byte[] MakeInt16WaveData(Int16[] ints)
{
int s = sizeof(Int16);
byte[] buf = new byte[s * ints.Length];
for(int i = 0; i < ints.Length; i++)
{
Buffer.BlockCopy(BitConverter.GetBytes(ints[i]), 0, buf, i * s, s);
}
return buf;
}
This works as expected! Now I wanted to swoop from one frequency to another like this:
public static Wave GetSineSwoop(double length, double hzStart, double hzEnd)
{
int bitRate = 44100;
int l = (int)(bitRate * length);
double f = 1.0 / bitRate;
Int16[] data = new Int16[l];
double hz;
double hzDelta = hzEnd - hzStart;
for (int i = 0; i < l; i++)
{
hz = hzStart + ((double)i / l) * hzDelta * 0.5; // why *0.5 ?
data[i] = (Int16)(Math.Sin(hz * i * f * Math.PI * 2) * Int16.MaxValue);
}
return new Wave(false, Wave.MakeInt16WaveData(data));
}
Now, when I swooped from 200 to 100 Hz, the sound played from 200 to 0 hertz. For some reason I had to multiply the delta by 0.5 to get the correct output. What might be the issue here ? Is this an audio thing or is there a bug in my code ?
Thanks
Edit by TaW: I take the liberty to add screenshots of the data in a chart which illustrate the problem, the first is with the 0.5 factor, the 2nd with 1.0 and the 3rd & 4th with 2.0 and 5.0:
Edit: here is an example, a swoop from 200 to 100 hz:
Debug values:
Wave clearly does not end at 100 hz
Digging out my rusty math I think it may be because:
Going in L steps from frequency F1 to F2 you have a frequency of
Fi = F1 + i * ( F2 - F1 ) / L
or with
( F2 - F1 ) / L = S
Fi = F1 + i * S
Now to find out how far we have progressed we need the integral over i, which would be
I(Fi) = i * F1 + 1/2 * i ^ 2 * S
Which give or take resembles the term inside your formula for the sine.
Note that you can gain efficiency by moving the constant part (S/2) out of the loop..

How to find audio power (db) in pcm stream (c#)

I have a pcm 16 bit stream and I need to know when the audio pass a specific power.
Do I need the fft for this or I can know it in a simpler way?
Thanks
I think you need FFT.
The following methods are available in a FFT. (Usually)
/* Calculate normal power
NumSamples : Number of sample
pReal : Real coefficient buffer
pImag : Imaginary coefficient buffer
pAmpl : Working buffer to hold amplitude Xps(m) = | X(m)^2 | = Xreal(m)^2 + Ximag(m)^2</param>
*/
public static void NormalPower(UInt32 NumSamples, Double[] pReal, Double[] pImag, Double[] pAmpl)
{
// Calculate amplitude values in the buffer provided
for (UInt32 i = 0; i < NumSamples; i++)
{
pAmpl[i] = pReal[i]*pReal[i] + pImag[i]*pImag[i];
}
}
/* Find Peak frequency in Hz
NumSamples : Number of samples
pAmpl : Current amplitude
samplingRate : Sampling rate in samples/second (Hz)
index : Frequency index
<returns>Peak frequency in Hz</returns>
* */
public static Double PeakFrequency(UInt32 NumSamples, Double[] pAmpl, Double samplingRate, ref UInt32 index)
{
UInt32 N = NumSamples >> 1; // number of positive frequencies. (numSamples/2)
double maxAmpl = -1.0;
double peakFreq = -1.0;
index = 0;
for (UInt32 i = 0; i < N; i++)
{
if ( pAmpl[i] > maxAmpl )
{
maxAmpl = (double)pAmpl[i];
index = i;
peakFreq = (double)(i);
}
}
return samplingRate * peakFreq / (double)(NumSamples);
}
And you can use them like this:
FFT.Compute(_numSamples, RealIn, null, RealOut, ImagOut, false);
FFT.NormalPower(_numSamples / 2, RealOut, ImagOut, AmplOut);
double maxAmpl = (32767.0 * 32767.0); //Max power used for 16 bit audio
int centerFreq = (Rate / 2);
for (int i = 0; i < NUM_FREQUENCY; ++i)
{
if (METER_FREQUENCY[i] > centerFreq)
_meterData[i] = 0;
else
{
var indice = (int)(METER_FREQUENCY[i] * _numSamples / Rate);
var metervalue = (int)(20.0 * Math.Log10(AmplOut[indice] / maxAmpl));
_meterData[i] = metervalue;
}
}
Note: To get the sound intensity in dB the following formula is used:
level = 20 Log (P2/P1)
P1=Max Power
P2=Current Power

c# LOESS/LOWESS regression [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed last year.
Improve this question
Do you know of a .net library to perform a LOESS/LOWESS regression? (preferably free/open source)
Port from java to c#
public class LoessInterpolator
{
public static double DEFAULT_BANDWIDTH = 0.3;
public static int DEFAULT_ROBUSTNESS_ITERS = 2;
/**
* The bandwidth parameter: when computing the loess fit at
* a particular point, this fraction of source points closest
* to the current point is taken into account for computing
* a least-squares regression.
*
* A sensible value is usually 0.25 to 0.5.
*/
private double bandwidth;
/**
* The number of robustness iterations parameter: this many
* robustness iterations are done.
*
* A sensible value is usually 0 (just the initial fit without any
* robustness iterations) to 4.
*/
private int robustnessIters;
public LoessInterpolator()
{
this.bandwidth = DEFAULT_BANDWIDTH;
this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS;
}
public LoessInterpolator(double bandwidth, int robustnessIters)
{
if (bandwidth < 0 || bandwidth > 1)
{
throw new ApplicationException(string.Format("bandwidth must be in the interval [0,1], but got {0}", bandwidth));
}
this.bandwidth = bandwidth;
if (robustnessIters < 0)
{
throw new ApplicationException(string.Format("the number of robustness iterations must be non-negative, but got {0}", robustnessIters));
}
this.robustnessIters = robustnessIters;
}
/**
* Compute a loess fit on the data at the original abscissae.
*
* #param xval the arguments for the interpolation points
* #param yval the values for the interpolation points
* #return values of the loess fit at corresponding original abscissae
* #throws MathException if some of the following conditions are false:
* <ul>
* <li> Arguments and values are of the same size that is greater than zero</li>
* <li> The arguments are in a strictly increasing order</li>
* <li> All arguments and values are finite real numbers</li>
* </ul>
*/
public double[] smooth(double[] xval, double[] yval)
{
if (xval.Length != yval.Length)
{
throw new ApplicationException(string.Format("Loess expects the abscissa and ordinate arrays to be of the same size, but got {0} abscisssae and {1} ordinatae", xval.Length, yval.Length));
}
int n = xval.Length;
if (n == 0)
{
throw new ApplicationException("Loess expects at least 1 point");
}
checkAllFiniteReal(xval, true);
checkAllFiniteReal(yval, false);
checkStrictlyIncreasing(xval);
if (n == 1)
{
return new double[] { yval[0] };
}
if (n == 2)
{
return new double[] { yval[0], yval[1] };
}
int bandwidthInPoints = (int)(bandwidth * n);
if (bandwidthInPoints < 2)
{
throw new ApplicationException(string.Format("the bandwidth must be large enough to accomodate at least 2 points. There are {0} " +
" data points, and bandwidth must be at least {1} but it is only {2}",
n, 2.0 / n, bandwidth
));
}
double[] res = new double[n];
double[] residuals = new double[n];
double[] sortedResiduals = new double[n];
double[] robustnessWeights = new double[n];
// Do an initial fit and 'robustnessIters' robustness iterations.
// This is equivalent to doing 'robustnessIters+1' robustness iterations
// starting with all robustness weights set to 1.
for (int i = 0; i < robustnessWeights.Length; i++) robustnessWeights[i] = 1;
for (int iter = 0; iter <= robustnessIters; ++iter)
{
int[] bandwidthInterval = { 0, bandwidthInPoints - 1 };
// At each x, compute a local weighted linear regression
for (int i = 0; i < n; ++i)
{
double x = xval[i];
// Find out the interval of source points on which
// a regression is to be made.
if (i > 0)
{
updateBandwidthInterval(xval, i, bandwidthInterval);
}
int ileft = bandwidthInterval[0];
int iright = bandwidthInterval[1];
// Compute the point of the bandwidth interval that is
// farthest from x
int edge;
if (xval[i] - xval[ileft] > xval[iright] - xval[i])
{
edge = ileft;
}
else
{
edge = iright;
}
// Compute a least-squares linear fit weighted by
// the product of robustness weights and the tricube
// weight function.
// See http://en.wikipedia.org/wiki/Linear_regression
// (section "Univariate linear case")
// and http://en.wikipedia.org/wiki/Weighted_least_squares
// (section "Weighted least squares")
double sumWeights = 0;
double sumX = 0, sumXSquared = 0, sumY = 0, sumXY = 0;
double denom = Math.Abs(1.0 / (xval[edge] - x));
for (int k = ileft; k <= iright; ++k)
{
double xk = xval[k];
double yk = yval[k];
double dist;
if (k < i)
{
dist = (x - xk);
}
else
{
dist = (xk - x);
}
double w = tricube(dist * denom) * robustnessWeights[k];
double xkw = xk * w;
sumWeights += w;
sumX += xkw;
sumXSquared += xk * xkw;
sumY += yk * w;
sumXY += yk * xkw;
}
double meanX = sumX / sumWeights;
double meanY = sumY / sumWeights;
double meanXY = sumXY / sumWeights;
double meanXSquared = sumXSquared / sumWeights;
double beta;
if (meanXSquared == meanX * meanX)
{
beta = 0;
}
else
{
beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX);
}
double alpha = meanY - beta * meanX;
res[i] = beta * x + alpha;
residuals[i] = Math.Abs(yval[i] - res[i]);
}
// No need to recompute the robustness weights at the last
// iteration, they won't be needed anymore
if (iter == robustnessIters)
{
break;
}
// Recompute the robustness weights.
// Find the median residual.
// An arraycopy and a sort are completely tractable here,
// because the preceding loop is a lot more expensive
System.Array.Copy(residuals, sortedResiduals, n);
//System.arraycopy(residuals, 0, sortedResiduals, 0, n);
Array.Sort<double>(sortedResiduals);
double medianResidual = sortedResiduals[n / 2];
if (medianResidual == 0)
{
break;
}
for (int i = 0; i < n; ++i)
{
double arg = residuals[i] / (6 * medianResidual);
robustnessWeights[i] = (arg >= 1) ? 0 : Math.Pow(1 - arg * arg, 2);
}
}
return res;
}
/**
* Given an index interval into xval that embraces a certain number of
* points closest to xval[i-1], update the interval so that it embraces
* the same number of points closest to xval[i]
*
* #param xval arguments array
* #param i the index around which the new interval should be computed
* #param bandwidthInterval a two-element array {left, right} such that: <p/>
* <tt>(left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])</tt>
* <p/> and also <p/>
* <tt>(right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])</tt>.
* The array will be updated.
*/
private static void updateBandwidthInterval(double[] xval, int i, int[] bandwidthInterval)
{
int left = bandwidthInterval[0];
int right = bandwidthInterval[1];
// The right edge should be adjusted if the next point to the right
// is closer to xval[i] than the leftmost point of the current interval
int nextRight = nextNonzero(weights, right);
if (nextRight < xval.Length && xval[nextRight] - xval[i] < xval[i] - xval[left])
{
int nextLeft = nextNonzero(weights, bandwidthInterval[0]);
bandwidthInterval[0] = nextLeft;
bandwidthInterval[1] = nextRight;
}
}
/**
* Compute the
* tricube
* weight function
*
* #param x the argument
* #return (1-|x|^3)^3
*/
private static double tricube(double x)
{
double tmp = Math.abs(x);
tmp = 1 - tmp * tmp * tmp;
return tmp * tmp * tmp;
}
/**
* Check that all elements of an array are finite real numbers.
*
* #param values the values array
* #param isAbscissae if true, elements are abscissae otherwise they are ordinatae
* #throws MathException if one of the values is not
* a finite real number
*/
private static void checkAllFiniteReal(double[] values, bool isAbscissae)
{
for (int i = 0; i < values.Length; i++)
{
double x = values[i];
if (Double.IsInfinity(x) || Double.IsNaN(x))
{
string pattern = isAbscissae ?
"all abscissae must be finite real numbers, but {0}-th is {1}" :
"all ordinatae must be finite real numbers, but {0}-th is {1}";
throw new ApplicationException(string.Format(pattern, i, x));
}
}
}
/**
* Check that elements of the abscissae array are in a strictly
* increasing order.
*
* #param xval the abscissae array
* #throws MathException if the abscissae array
* is not in a strictly increasing order
*/
private static void checkStrictlyIncreasing(double[] xval)
{
for (int i = 0; i < xval.Length; ++i)
{
if (i >= 1 && xval[i - 1] >= xval[i])
{
throw new ApplicationException(string.Format(
"the abscissae array must be sorted in a strictly " +
"increasing order, but the {0}-th element is {1} " +
"whereas {2}-th is {3}",
i - 1, xval[i - 1], i, xval[i]));
}
}
}
}
Since I'm unable to comment on other people's posts (new user), and people seem to think I should do that with this instead of editing the above answer, I'm simply going to write it as an answer even though I know this is better as a comment.
The updateBandwidthInterval method in the above answer forgets to check the left side as written in the method comment. This can give NaN issues for sumWeights. The below should fix that. I encountered this when doing a c++ implementation based on the above.
/**
* Given an index interval into xval that embraces a certain number of
* points closest to xval[i-1], update the interval so that it embraces
* the same number of points closest to xval[i]
*
* #param xval arguments array
* #param i the index around which the new interval should be computed
* #param bandwidthInterval a two-element array {left, right} such that: <p/>
* <tt>(left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])</tt>
* <p/> and also <p/>
* <tt>(right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])</tt>.
* The array will be updated.
*/
private static void updateBandwidthInterval(double[] xval, int i, int[] bandwidthInterval)
{
int left = bandwidthInterval[0];
int right = bandwidthInterval[1];
// The edges should be adjusted if the previous point to the
// left is closer to x than the current point to the right or
// if the next point to the right is closer
// to x than the leftmost point of the current interval
if (left != 0 &&
xval[i] - xval[left - 1] < xval[right] - xval[i])
{
bandwidthInterval[0]++;
bandwidthInterval[1]++;
}
else if (right < xval.Length - 1 &&
xval[right + 1] - xval[i] < xval[i] - xval[left])
{
bandwidthInterval[0]++;
bandwidthInterval[1]++;
}
}
Hope someone after 5 years find this useful. This is the original code posted by Tutcugil but with the missing methods and updated.
using System;
using System.Linq;
namespace StockCorrelation
{
public class LoessInterpolator
{
public static double DEFAULT_BANDWIDTH = 0.3;
public static int DEFAULT_ROBUSTNESS_ITERS = 2;
/**
* The bandwidth parameter: when computing the loess fit at
* a particular point, this fraction of source points closest
* to the current point is taken into account for computing
* a least-squares regression.
*
* A sensible value is usually 0.25 to 0.5.
*/
private double bandwidth;
/**
* The number of robustness iterations parameter: this many
* robustness iterations are done.
*
* A sensible value is usually 0 (just the initial fit without any
* robustness iterations) to 4.
*/
private int robustnessIters;
public LoessInterpolator()
{
this.bandwidth = DEFAULT_BANDWIDTH;
this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS;
}
public LoessInterpolator(double bandwidth, int robustnessIters)
{
if (bandwidth < 0 || bandwidth > 1)
{
throw new ApplicationException(string.Format("bandwidth must be in the interval [0,1], but got {0}", bandwidth));
}
this.bandwidth = bandwidth;
if (robustnessIters < 0)
{
throw new ApplicationException(string.Format("the number of robustness iterations must be non-negative, but got {0}", robustnessIters));
}
this.robustnessIters = robustnessIters;
}
/**
* Compute a loess fit on the data at the original abscissae.
*
* #param xval the arguments for the interpolation points
* #param yval the values for the interpolation points
* #return values of the loess fit at corresponding original abscissae
* #throws MathException if some of the following conditions are false:
* <ul>
* <li> Arguments and values are of the same size that is greater than zero</li>
* <li> The arguments are in a strictly increasing order</li>
* <li> All arguments and values are finite real numbers</li>
* </ul>
*/
public double[] smooth(double[] xval, double[] yval, double[] weights)
{
if (xval.Length != yval.Length)
{
throw new ApplicationException(string.Format("Loess expects the abscissa and ordinate arrays to be of the same size, but got {0} abscisssae and {1} ordinatae", xval.Length, yval.Length));
}
int n = xval.Length;
if (n == 0)
{
throw new ApplicationException("Loess expects at least 1 point");
}
checkAllFiniteReal(xval, true);
checkAllFiniteReal(yval, false);
checkStrictlyIncreasing(xval);
if (n == 1)
{
return new double[] { yval[0] };
}
if (n == 2)
{
return new double[] { yval[0], yval[1] };
}
int bandwidthInPoints = (int)(bandwidth * n);
if (bandwidthInPoints < 2)
{
throw new ApplicationException(string.Format("the bandwidth must be large enough to accomodate at least 2 points. There are {0} " +
" data points, and bandwidth must be at least {1} but it is only {2}",
n, 2.0 / n, bandwidth
));
}
double[] res = new double[n];
double[] residuals = new double[n];
double[] sortedResiduals = new double[n];
double[] robustnessWeights = new double[n];
// Do an initial fit and 'robustnessIters' robustness iterations.
// This is equivalent to doing 'robustnessIters+1' robustness iterations
// starting with all robustness weights set to 1.
for (int i = 0; i < robustnessWeights.Length; i++) robustnessWeights[i] = 1;
for (int iter = 0; iter <= robustnessIters; ++iter)
{
int[] bandwidthInterval = { 0, bandwidthInPoints - 1 };
// At each x, compute a local weighted linear regression
for (int i = 0; i < n; ++i)
{
double x = xval[i];
// Find out the interval of source points on which
// a regression is to be made.
if (i > 0)
{
updateBandwidthInterval(xval, weights, i, bandwidthInterval);
}
int ileft = bandwidthInterval[0];
int iright = bandwidthInterval[1];
// Compute the point of the bandwidth interval that is
// farthest from x
int edge;
if (xval[i] - xval[ileft] > xval[iright] - xval[i])
{
edge = ileft;
}
else
{
edge = iright;
}
// Compute a least-squares linear fit weighted by
// the product of robustness weights and the tricube
// weight function.
// See http://en.wikipedia.org/wiki/Linear_regression
// (section "Univariate linear case")
// and http://en.wikipedia.org/wiki/Weighted_least_squares
// (section "Weighted least squares")
double sumWeights = 0;
double sumX = 0, sumXSquared = 0, sumY = 0, sumXY = 0;
double denom = Math.Abs(1.0 / (xval[edge] - x));
for (int k = ileft; k <= iright; ++k)
{
double xk = xval[k];
double yk = yval[k];
double dist;
if (k < i)
{
dist = (x - xk);
}
else
{
dist = (xk - x);
}
double w = tricube(dist * denom) * robustnessWeights[k];
double xkw = xk * w;
sumWeights += w;
sumX += xkw;
sumXSquared += xk * xkw;
sumY += yk * w;
sumXY += yk * xkw;
}
double meanX = sumX / sumWeights;
double meanY = sumY / sumWeights;
double meanXY = sumXY / sumWeights;
double meanXSquared = sumXSquared / sumWeights;
double beta;
if (meanXSquared == meanX * meanX)
{
beta = 0;
}
else
{
beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX);
}
double alpha = meanY - beta * meanX;
res[i] = beta * x + alpha;
residuals[i] = Math.Abs(yval[i] - res[i]);
}
// No need to recompute the robustness weights at the last
// iteration, they won't be needed anymore
if (iter == robustnessIters)
{
break;
}
// Recompute the robustness weights.
// Find the median residual.
// An arraycopy and a sort are completely tractable here,
// because the preceding loop is a lot more expensive
System.Array.Copy(residuals, sortedResiduals, n);
//System.arraycopy(residuals, 0, sortedResiduals, 0, n);
Array.Sort<double>(sortedResiduals);
double medianResidual = sortedResiduals[n / 2];
if (medianResidual == 0)
{
break;
}
for (int i = 0; i < n; ++i)
{
double arg = residuals[i] / (6 * medianResidual);
robustnessWeights[i] = (arg >= 1) ? 0 : Math.Pow(1 - arg * arg, 2);
}
}
return res;
}
public double[] smooth(double[] xval, double[] yval)
{
if (xval.Length != yval.Length)
{
throw new Exception($"xval and yval len are different");
}
double[] unitWeights = Enumerable.Repeat(1.0, xval.Length).ToArray();
return smooth(xval, yval, unitWeights);
}
/**
* Given an index interval into xval that embraces a certain number of
* points closest to xval[i-1], update the interval so that it embraces
* the same number of points closest to xval[i]
*
* #param xval arguments array
* #param i the index around which the new interval should be computed
* #param bandwidthInterval a two-element array {left, right} such that: <p/>
* <tt>(left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])</tt>
* <p/> and also <p/>
* <tt>(right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])</tt>.
* The array will be updated.
*/
private static void updateBandwidthInterval(double[] xval, double[] weights,
int i,
int[] bandwidthInterval)
{
int left = bandwidthInterval[0];
int right = bandwidthInterval[1];
// The right edge should be adjusted if the next point to the right
// is closer to xval[i] than the leftmost point of the current interval
int nextRight = nextNonzero(weights, right);
if (nextRight < xval.Length && xval[nextRight] - xval[i] < xval[i] - xval[left])
{
int nextLeft = nextNonzero(weights, bandwidthInterval[0]);
bandwidthInterval[0] = nextLeft;
bandwidthInterval[1] = nextRight;
}
}
private static int nextNonzero(double[] weights, int i)
{
int j = i + 1;
while (j < weights.Length && weights[j] == 0)
{
++j;
}
return j;
}
/**
* Compute the
* tricube
* weight function
*
* #param x the argument
* #return (1-|x|^3)^3
*/
private static double tricube(double x)
{
double tmp = Math.Abs(x);
tmp = 1 - tmp * tmp * tmp;
return tmp * tmp * tmp;
}
/**
* Check that all elements of an array are finite real numbers.
*
* #param values the values array
* #param isAbscissae if true, elements are abscissae otherwise they are ordinatae
* #throws MathException if one of the values is not
* a finite real number
*/
private static void checkAllFiniteReal(double[] values, bool isAbscissae)
{
for (int i = 0; i < values.Length; i++)
{
double x = values[i];
if (Double.IsInfinity(x) || Double.IsNaN(x))
{
string pattern = isAbscissae ?
"all abscissae must be finite real numbers, but {0}-th is {1}" :
"all ordinatae must be finite real numbers, but {0}-th is {1}";
throw new ApplicationException(string.Format(pattern, i, x));
}
}
}
/**
* Check that elements of the abscissae array are in a strictly
* increasing order.
*
* #param xval the abscissae array
* #throws MathException if the abscissae array
* is not in a strictly increasing order
*/
private static void checkStrictlyIncreasing(double[] xval)
{
for (int i = 0; i < xval.Length; ++i)
{
if (i >= 1 && xval[i - 1] >= xval[i])
{
throw new ApplicationException(string.Format(
"the abscissae array must be sorted in a strictly " +
"increasing order, but the {0}-th element is {1} " +
"whereas {2}-th is {3}",
i - 1, xval[i - 1], i, xval[i]));
}
}
}
}
}

Categories

Resources