I want to use Excel's buil-in function called LINEST() to do regression analysis in .net.
I am able to use the function with squred matrix array, but when it is not square matrix say of order[12,3] then it gives error as:
LinEst method of WorksheetFunction class failed
Please help me out with this as it is very important for me to complete this code.
This is my complete code:
System.Data.DataTable dt = new System.Data.DataTable();
SqlCommand cmd =new SqlCommand("Select QtytoTransfer from DEmo ",con);
SqlDataAdapter adp = new SqlDataAdapter(cmd);
adp.Fill(dt);
List<double> yDatapoints =new List<double>();
foreach (DataRow dr in dt.Rows)
{
yDatapoints.Add(Convert.ToDouble( dr["QtytoTransfer"].ToString()));
}
System.Data.DataTable dt1 = new System.Data.DataTable();
SqlCommand sqlcmd = new SqlCommand("Select CurrentQoh,QtySold,GameTime from DEmo ", con);
SqlDataAdapter adp1 = new SqlDataAdapter(sqlcmd);
adp1.Fill(dt1);
double[,] xAll = new double[dt1.Rows.Count, dt1.Columns.Count];
for (int i = 0; i < dt1.Rows.Count; ++i)
{
for (int j = 0; j < dt1.Columns.Count; ++j)
{
xAll[i, j] = Convert.ToDouble(dt1.Rows[i][j].ToString());
}
}
Microsoft.Office.Interop.Excel.Application xl = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.WorksheetFunction wsf = xl.WorksheetFunction;
object[,] reslut = (object[,])wsf.LinEst(yDatapoints.ToArray(), xAll, missing, true);
if your xAll has a dimension of [12,3] your yDataPoints length should be 3 for proper functioning of LinEst().
using System;
namespace InteropExcel {
class Program {
static void Main(string[] args) {
Random rand = new Random();
double[] yDatapoints = new double[3];
for (int i = 0; i < 3; i++) {
yDatapoints[i]=rand.Next(20, 60);
}
double[,] xAll = new double[12, 3];
for (int i = 0; i < 12; i++) {
for (int j = 0; j < 3; j++) {
xAll[i, j] = rand.Next(2, 100);
}
}
Microsoft.Office.Interop.Excel.Application xl = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.WorksheetFunction wsf = xl.WorksheetFunction;
object[,] result = (object[,])wsf.LinEst(yDatapoints, xAll, Type.Missing, true);
}
}
}
The column size of xAll should be equal to the length of yDataPoints array. Please try and let me know.
Here's an implementation of Excel's LINEST() function in C#. It might be easier than creating a dependency on the Microsoft.Office.Interop.Excel DLL file.
This returns the slope for a given set of data, normalized using the same "least squares" method that LINEST() uses:
public static double CalculateLinest(double[] y, double[] x)
{
double linest = 0;
if (y.Length == x.Length)
{
double avgY = y.Average();
double avgX = x.Average();
double[] dividend = new double[y.Length];
double[] divisor = new double[y.Length];
for (int i = 0; i < y.Length; i++)
{
dividend[i] = (x[i] - avgX) * (y[i] - avgY);
divisor[i] = Math.Pow((x[i] - avgX), 2);
}
linest = dividend.Sum() / divisor.Sum();
}
return linest;
}
Also, here's a method I wrote to get the "b" (y-intercept) value that Excel's LINEST function generates.
private double CalculateYIntercept(double[] x, double[] y, double linest)
{
return (y.Average() - linest * x.Average());
}
Since these methods only work for one set of data, I would recommend calling them inside of a loop if you wish to produce multiple sets of linear regression data.
This link helped me find my answer: https://agrawalreetesh.blogspot.com/2011/11/how-to-calculate-linest-of-given.html
Related
I want to read a selected cell range of Excel sheet into a DataTable without using OLEDB. And I don't want to use 3rd party tools either.
Here is what I have done so far:
public void converter()
{
xlApp = new Microsoft.Office.Interop.Excel.Application();
xlWorkbook = xlApp.Workbooks.Open(path);
Microsoft.Office.Interop.Excel.Sheets sheets = xlWorkbook.Worksheets;
xlWorksheet = xlWorkbook.Sheets[3];
for (int i = 1; i <= 10; i++)
{
range = xlWorksheet.get_Range("A2", "I8");
System.Array myvalues = (System.Array)range.Cells.Value;
//string[] strArray = ConvertToStringArray(myvalues);
//List<string> lst = myvalues.OfType<string>().ToList();
}
}
How can I achieve this?
After Googling, I ended up with following but I do not know how to make it work with cell range
public System.Data.DataTable READExcel(string path)
{
Microsoft.Office.Interop.Excel.Application objXL = null;
Microsoft.Office.Interop.Excel.Workbook objWB = null;
objXL = new Microsoft.Office.Interop.Excel.Application();
objWB = objXL.Workbooks.Open(path);
Microsoft.Office.Interop.Excel.Worksheet objSHT = objWB.Worksheets[5];
int rows = objSHT.UsedRange.Rows.Count;
int cols = objSHT.UsedRange.Columns.Count;
System.Data.DataTable dt = new System.Data.DataTable();
int noofrow = 1;
for (int c = 1; c <= cols; c++)
{
string colname = objSHT.Cells[1, c].Text;
dt.Columns.Add(colname);
noofrow = 2;
}
for (int r = noofrow; r <= rows; r++)
{
DataRow dr = dt.NewRow();
for (int c = 1; c <= cols; c++)
{
dr[c - 1] = objSHT.Cells[r, c].Text;
}
dt.Rows.Add(dr);
}
objWB.Close();
objXL.Quit();
return dt;
}
Kindly help me achieve this.
It's not perfect and you might need to tweek some parameters, I didn't check if it gets the correct area, thats your work.
But it gets a DataTable out of an Excel Worksheet, as you asked.
I'd prefer creating an custom object list with the columns as properties, rather than a DataTable, but thats only me.
public System.Data.DataTable READExcel(string path,int row,int col, int rowamount, int colamount, int worksheetindex)
{
Microsoft.Office.Interop.Excel.Application objXL = null;
Microsoft.Office.Interop.Excel.Workbook objWB = null;
objXL = new Microsoft.Office.Interop.Excel.Application();
objWB = objXL.Workbooks.Open(path);
Microsoft.Office.Interop.Excel.Worksheet objSHT = objWB.Worksheets[worksheetindex];
int rows = objSHT.UsedRange.Rows.Count;
int cols = objSHT.UsedRange.Columns.Count;
System.Data.DataTable dt = new System.Data.DataTable();
// Change that for other column names
for (int c = col; c <= col + colamount; c++)
{
dt.Columns.Add(objSHT.Cells[row, c].Text);
}
// read the content
for (int r = row; r <= row+rowamount; r++)
{
DataRow dr = dt.NewRow();
for (int c = col; c <= col+colamount; c++)
{
dr[c - 1] = objSHT.Cells[r, c].Text;
}
dt.Rows.Add(dr);
}
objWB.Close();
objXL.Quit();
return dt;
}
Call it like that:
// A2 -> row:2,col:1 till F6 row:2+4, col1+5
READExcel(#"path", 2, 1, 4, 5, 3);
The faster way would be using Value2, example: var range = xlWorksheet.Range["A3", "B4"].Value2; (get_Range() doesn't work with my excel)
and iterate over it, filling the DataTable. range[1,1] would be the content of A3 in this example.
I am trying to convert this code:
function [C] = cumulativeMaxV2(A)
cols = size(A,2);
bscans = size(A,3);
C = zeros(size(A));
for col = 1:cols
for bscan = 1:bscans
aline = A(:,col,bscan);
for i = 1:length(aline)
if i == 1
C(i,col,bscan)=0;
else
C(i,col,bscan) = max(A(1:i-1, col,bscan));
end
end
end
end
My C# code is below:
static double[,,] CumulativeMax(double[,,] A)
{
int cols = 304; //A.GetLength(1);
int bscans = 304; //A.GetLength(2);
double[,,] C = new double[160, 304, 304];
Console.Write("Processing... ");
using (var progress = new ProgressBar())
{
for (int col = 0; col < cols; col++)
{
for (int bscan = 0; bscan < bscans; bscan++)
{
double[] aline = new double[160];
for (int i = 0; i < 160; i++)
aline[i] = A[i,col,bscan];
for (int i = 0; i < aline.GetLength(0); i++)
{
if (i == 0)
C[i,col,bscan] = 0d;
else if (i == 1)
{
double[] temp = new double[i];
for (int x = 0; x < i; x++)
temp[x] = A[x,col,bscan];
C[i,col,bscan] = temp.Max();
}
else
{
double[] temp = new double[i - 1];
for (int x = 0; x < i - 1; x++)
temp[x] = A[x,col,bscan];
C[i,col,bscan] = temp.Max();
}
}
}
progress.Report((double)col/cols);
}
}
Console.WriteLine("Done.");
return C;
}
Outputs from MatLab do not match those from the C# code.
Any pointers to where the bugs are in my C# code would be great. I'm not very good with MatLab.
I think this may be due to how MatLab's max function deals with infinity and NaNs.
How to create a new page on each new table?
It means that the table should be placed on one page.
The following code is a sample code that outputs login, but does not create a new page for each table.
List<System.Data.DataTable> bbb = new List<System.Data.DataTable>();
System.Data.DataTable dt = new System.Data.DataTable();
dt.Columns.Add("Columns 1");
dt.Columns.Add("Columns 2");
dt.Columns.Add("Columns 3");
dt.Rows.Add("aaa", "aaa", "aaa");
dt.Rows.Add("bbb", "bbb", "bbb");
dt.Rows.Add("ccc", "ccc", "ccc");
System.Data.DataTable dt2 = new System.Data.DataTable();
dt2.Columns.Add("Columns 5");
dt2.Rows.Add("aaa5");
bbb.Add(dt);
bbb.Add(dt2);
List<System.Data.DataTable> ListDataTable = new List<System.Data.DataTable>();
ListDataTable = bbb;
object objEndOfDoc = "\\endofdoc";
Microsoft.Office.Interop.Word.Document Wdc = new Microsoft.Office.Interop.Word.Document();
Microsoft.Office.Interop.Word.Range WordRange = Wdc.Bookmarks.get_Item(ref objEndOfDoc).Range;
Microsoft.Office.Interop.Word.Table wordTable;
for (int ListDataT = 0; ListDataT < ListDataTable.Count; ListDataT++)
{
int iRowCount = ListDataTable[ListDataT].Rows.Count;
int iColCount = ListDataTable[ListDataT].Columns.Count;
object objMissing = System.Reflection.Missing.Value;
wordTable = Wdc.Tables.Add(WordRange, iRowCount, iColCount, ref objMissing, ref objMissing);
int iTableRow = 1;
int iTableCol = 1;
for (int i = 0; i < ListDataTable[ListDataT].Columns.Count; i++)
{
wordTable.Cell(iTableRow, iTableCol).Range.Text = ListDataTable[ListDataT].Columns[i].ColumnName;
iTableCol++;
}
iTableRow++;
for (int i = 0; i < ListDataTable[ListDataT].Rows.Count; i++)
{
iTableCol = 1;
for (int j = 0; j < ListDataTable[ListDataT].Columns.Count; j++)
{
wordTable.Cell(iTableRow, iTableCol).Range.Text = ListDataTable[ListDataT].Rows[i][j].ToString();
// Console.Write(dt.Rows[i][j].ToString());
iTableCol++;
}
iTableRow++;
}
wordTable.Borders.Enable = 1;
wordTable.set_Style("Light Grid - Accent 3");
}
Wdc.SaveAs("c://test.docx");
Wdc.Close();
There are two possibilities:
1) Format the first row of the table with the paragraph formatting "Page Break Before".
2) Insert a manual Page Break between each table (keyboard equivalent: Shift+Enter).
Being a Word professional, my inclination is to use (1) unless something speaks against it. Most people, however, tend to use (2) because it's more obvious/discoverable.
wordTable.Rows[1].Range.ParagraphFormat.PageBreakBefore = true;
OR
Word.Range rng = wordTable.Range;
rng.Collapse(Word.WdCollapseDirection.wdCollapseEnd);
rng.InsertBreak(Word.WdBreakType.wdPageBreak);
I want to use for loop to get average of datatable columns and rows. What I want to do is that what if there are 100 ~ 1000 columns and rows, I can't keep on adding them in the code. is there one simple code that can get average of automatically as I add columns and rows?
here is my code, I am stuck I don't know what to write in ?? area below and this code gets me error please help...
private void button1_Click(object sender, EventArgs e)
{
DataTable dtGrid = gridData.DataSource as DataTable;
DataTable dtResult = new DataTable();
Math columnIndex = new Math();
List<double> avgList = new List<double>();
for (int i = 0; i < dtGrid.Columns.Count; i++)
{
for (int k = 1; k < dtGrid.Rows.Count; k++)
{
// ??
avgList.Add(Convert.ToDouble(dtGrid.Rows[i].ToString()));
}
}
//this is from other class name Math
/* public double getAverageValue(List<double> avgList)
{
double averageList = 0;
averageList = MathNet.Numerics.Statistics.Statistics.Mean(avgList.ToList());
return averageList;
}*/
double averageX1 = columnIndex.getAverageValue(avgList);
List<Math> list = new List<Math>();
//using get; set from other class
list.Add(new Math { Result = "Average", X1 = averageX1.ToString() });
gridData2.DataSource = list;
}
}
}
It looks like your loop is inside out. Try this:
DataTable dtGrid = gridData.DataSource as DataTable;
DataTable dtResult = new DataTable();
Math columnIndex = new Math();
List<double> avgList = new List<double>();
for (int k = 1; k < dtGrid.Rows.Count; k++)
{
for (int i = 0; i < dtGrid.Columns.Count; i++)
{
// ??
avgList.Add(Convert.ToDouble(dtGrid.Rows[k].Columns[i].ToString()));
}
}
This logic averages all columns in a row together. If you need,, you can create a Dictionary and average each column separately. Something like thisL
Dictionary<int, List<double>> AvgColumnList = new Dictionary<int, System.Collections.Generic.List<double>>();
This uses a dictionary that contains a list for each column in the row. If there are 100 columns, then there will be 100 entries in the dictionary with index 0 - 99. Each dictionary item will contain a list of doubles.
for (int k = 1; k < dtGrid.Rows.Count; k++)
{
for (int i = 0; i < dtGrid.Columns.Count; i++)
{
if (!AvgColumnList.Keys.Contains(i))
AvgColumnList.Add(i, new List<double>());
AvgColumnList[i].Add(Convert.ToDouble(dtGrid.Rows[k].Columns[i].ToString()));
}
}
DataTable is zero index based, in your code row count started from 1 it should be 0, also dtGrid.Rows[i] is a row not the cell value. Use below code to loop through each cell of a DataTable
Update : Code updated as OP want to save each column data separately and irrespective of column numbers.
List<List<double>> perColumnAvg = new List<List<double>>();
for (int i = 0; i < dtGrid.Columns.Count; i++)
{
avgList = new List<double>();
for (int k = 0; k < dtGrid.Rows.Count; k++)
{
// ??
avgList.Add(Convert.ToDouble(dtGrid.Rows[k][i].ToString()));
}
perColumnAvg.Add(avgList);
}
Now you can compute individual column average as
foreach (var columnList in perColumnAvg)
{
// place your logic here.
columnIndex.getAverageValue(columnList);
}
And can compute avg across table using.
double tableAvg = columnIndex.getAverageValue(perColumnAvg.SelectMany(s=>s));
I have been trying to create couple of 2-D arrays via multi-threading. Each threading will generate a small 2-D array. All of the 2-D will be consolidated and that is where I am having issue. I commented "//!this is causing error" towards the bottom of SimulatingMethod method. Please share your insight. Thank you.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Threading;
namespace ThreadExample
{
class Program
{
static void Main(string[] args)
{
double[,] randSims;
randSims = SimulatingClass.SimulatingMethod();
}
}
class SimulatingClass
{
public static double[,] SimulatingMethod()
{
int rowCount = 9;
int columnCount = 1;
int NumberOfCores = System.Environment.ProcessorCount;
int RowsForEachThread = rowCount / NumberOfCores;
Thread[] arrayOfThread = new Thread[NumberOfCores];
DataStuff[] dataStuff= new DataStuff[NumberOfCores];
for (int i = 0; i < NumberOfCores; i++)
{
dataStuff[i] = new DataStuff(RowsForEachThread, columnCount);
arrayOfThread[i] = new Thread(new ThreadStart(dataStuff[i].UpdateMatrixData));
arrayOfThread[i].Name = "Thread" + i;
arrayOfThread[i].Start();
}
for (int i = 0; i < NumberOfCores; i++)
{
arrayOfThread[i].Join();
}
//start combining arrays from different threads
var list = new List<double[,]>();
for (int m = 0; m < NumberOfCores; m++)
{
list.AddRange(dataStuff[m]); //!this is causing error
}
//trying to convert list back to array
double[,] array3 = list.ToArray(); //!this is causing error
return array3;
}
}
class DataStuff
{
public double G;
public double[,] M;
public long steps, trials;
public DataStuff(long _steps, long _trials)
{
M = new Double[_steps, _trials]; // <- M is created in the constructor
G = 60;
steps = _steps;
trials = _trials;
}
public void UpdateMatrixData()
{
for (int i = 0; i < steps; i++)
{
for (int j = 0; j < trials; j++)
{
M[i, j] = i + j;
}
}
}
}
}
You should specify the property as follows:
list.Add(dataStuff[m].M);
It's because the dataStuff[m] is of type DataStuff, but the type double[,] expected as the list item.
If I understood you correctly, you need a consolidated 2D array. Try to declare it initially with desired dimensions:
double[,] array3 = new double[rowCount, columnCount];
And copy data from dataStuff array to it after processing:
for (int m = 0; m < NumberOfCores; m++)
{
Array.Copy(dataStuff[m].M, 0, array3, m * columnCount * RowsForEachThread, dataStuff[m].M.Length);
}
return array3;
And you don't need list at all.
Please note, that you have possible problems related to the rounding:
int RowsForEachThread = rowCount / NumberOfCores;
You should handle the situation when the rowCount is not divisible by the NumberOfCores.