I have 500 Columns in my DataTable and I want to remove all of them except for 25 columns.
Is there any way to do this faster to save time and lines of code?
This is what I already tried:
private static void DeleteUselessColumns()
{
//This is example data!
List<DataColumn> dataColumnsToDelete = new List<DataColumn>();
DataTable bigData = new DataTable();
bigData.Columns.Add("Harry");
bigData.Columns.Add("Konstantin");
bigData.Columns.Add("George");
bigData.Columns.Add("Gabriel");
bigData.Columns.Add("Oscar");
bigData.Columns.Add("Muhammad");
bigData.Columns.Add("Emily");
bigData.Columns.Add("Olivia");
bigData.Columns.Add("Isla");
List<string> columnsToKeep = new List<string>();
columnsToKeep.Add("Isla");
columnsToKeep.Add("Oscar");
columnsToKeep.Add("Konstantin");
columnsToKeep.Add("Gabriel");
//This is the code i want to optimize------
foreach (DataColumn column in bigData.Columns)
{
bool keepColumn = false;
foreach (string s in columnsToKeep)
{
if (column.ColumnName.Equals(s))
{
keepColumn = true;
}
}
if (!keepColumn)
{
dataColumnsToDelete.Add(column);
}
}
foreach(DataColumn dataColumn in dataColumnsToDelete)
{
bigData.Columns.Remove(dataColumn);
}
//------------------------
}
var columnsToKeep = new List<string>() { "Isla", "Oscar", "Konstantin", "Gabriel"};
var toRemove = new List<DataColumn>();
foreach(DataColumn column in bigData.Columns)
{
if (!columnsToKeep.Any(name => column.ColumnName == name ))
{
toRemove.Add(column);
}
}
toRemove.ForEach(col => bigData.Columns.Remove(col));
Test1...test9 same code could be made a loop. No need to add the columns to delete in a list, just delete them in the first while loop. As for performance, not sure how to improve it.
You could try to use a DataView that selects the desired columns then copy to table. You need to experiment.
if they have different names create an array of string
var columns = new string[] { "Harry", "Konstantin","John"};
var columnsToKeep = new string[] { "John", "Konstantin"};
var columnsToDelete = from item in columns
where !columnsToKeep.Contains(item)
select item;
or using lambda
var columnsToDelete = columns
.Where (i=> !columnsToKeep.Contains(i))
.ToList();
toDelete
Harry
Related
How to melt a DataTable in C# (wide to long format) as Python Pandas.melt does? https://pandas.pydata.org/docs/reference/api/pandas.melt.html
Is there any method already implemented? If not, how the code for melting a DataTable would
For example:
I have one DataTable which is in wide format, that is that has one row per id and has as many columns as variables. I would like to transform this DataTable to long format that has as many rows as combinations of id with each variable column. You can see this example in top image.
Please, if there is not clear enough visit Pandas documentation, there is more clear. (https://pandas.pydata.org/docs/reference/api/pandas.melt.html)
Note: I would like a solution that is DataTable independent, that is, that the solution is able to take parameters as id_vars, value_vars, etc... like Pandas.melt does
Any help is appreciated.
I don't know that Melt method but according to docs it seems to be an unpivot method:
public static DataTable MeltTable(DataTable inputTable, string outputColumn, params string[] unpivotColumns)
{
DataTable resultTable = new DataTable();
DataColumn col = new DataColumn(outputColumn, inputTable.Columns[outputColumn].DataType);
resultTable.Columns.Add(col);
resultTable.Columns.Add("Variable");
resultTable.Columns.Add("Value");
foreach(string unpivotColumn in unpivotColumns)
{
foreach (DataRow row in inputTable.Rows)
{
resultTable.Rows.Add(row[outputColumn], unpivotColumn, row[unpivotColumn]);
}
}
return resultTable;
}
You use it in this way:
DataTable table = new DataTable();
table.Columns.Add("Name");
table.Columns.Add("Course");
table.Columns.Add("Age", typeof(int));
table.Rows.Add("Tim", "Masters", 47);
table.Rows.Add("Bob", "Graduate", 19);
table.Rows.Add("Sheila", "Graduate", 20);
DataTable resultTable = MeltTable(table, "Name", "Course", "Age");
Result:
Name Variable Value
Tim Course Masters
Bob Course Graduate
Sheila Course Graduate
Tim Age 47
Bob Age 19
Sheila Age 20
#TimSchmelter gave me the answer but I modified a little bit to be a more general solution. Here's the code:
public static List<string> GetDifferenceColumns(DataTable dt, List<string> diffCols)
{
string[] columns = GetColumnsList(dt).ToArray();
IEnumerable<string> differenceColumns = from column in columns.Except(diffCols.ToArray()) select column;
return differenceColumns.ToList();
}
public static DataTable Melt(DataTable dt, List<string> idCols = null, List<string> varCols = null)
{
string errorPrefixString = "Error in DataProcessing Melt Method:\n";
bool varsColsIsNull = (varCols == null || varCols.Count == 0);
bool idColsIsNull = (idCols == null || idCols.Count == 0);
string varsName = "Variable";
string valueName = "Value";
if (dt.Rows.Count == 0)
{
throw new Exception(errorPrefixString + "DataTable is empty");
}
if (varsColsIsNull && varsColsIsNull)
{
throw new Exception(errorPrefixString+"You should past at least varCols or idCols");
}
if (varsColsIsNull)
{
varCols = GetDifferenceColumns(dt, idCols);
}
if (idColsIsNull)
{
idCols = GetDifferenceColumns(dt, varCols);
}
DataTable resultTable = new DataTable();
// Creating final columns of resultTable
foreach (string id in idCols)
{
resultTable.Columns.Add(id);
}
resultTable.Columns.Add(varsName);
resultTable.Columns.Add(valueName);
// Populating resultTable with the new rows
// generated by unpivoting varCols
foreach (string varCol in varCols)
{
foreach (DataRow row in dt.Rows)
{
DataRow resultRow = resultTable.NewRow();
foreach(string id in idCols)
{
resultRow[id] = row[id]; // create id cols
}
resultRow[varsName] = varCol;
resultRow[valueName] = row[varCol];
resultTable.Rows.Add(resultRow);
}
}
return resultTable;
}
How to use it:
DataTable dt = new DataTable();
dt.Columns.Add("Name");
dt.Columns.Add("Course");
dt.Columns.Add("Age");
dt.Rows.Add("Tim", "Masters", 47);
dt.Rows.Add("Bob", "Graduate", 19);
dt.Rows.Add("Sheila", "Graduate", 20);
List<string> varCols = new List<string> { "Course", "Age" };
DataTable finalDataTable = Melt(dt, varCols: varCols);
This code is working but taking too much time. Every data table contains 1000nds of rows and each time I need to filter data from another data tables with respect to a column.
for (int i = 0; i < dsResult.Tables[0].Rows.Count; i++)
{
DataTable dtFiltered = dtWorkExp.Clone();
foreach (DataRow drr in dtWorkExp.Rows)
{
if (drr["UserId"].ToString() == dsResult.Tables[0].Rows[i]["Registration NO."].ToString())
{
dtFiltered.ImportRow(drr);
}
}
DataTable dtFilteredAward= dtAwards.Clone();
foreach (DataRow drr in dtAwards.Rows)
{
if (drr["UserId"].ToString() == dsResult.Tables[0].Rows[i]["Registration NO."].ToString())
{
dtFilteredAward.ImportRow(drr);
}
}
DataTable dtFilteredOtherQual = dtOtherQual.Clone();
foreach (DataRow drr in dtOtherQual.Rows)
{
if (drr["UserId"].ToString() == dsResult.Tables[0].Rows[i]["Registration NO."].ToString())
{
dtFilteredOtherQual.ImportRow(drr);
}
}
//Do some operation with filtered Data Tables
}
You can declare these lines outside the for loop.
DataTable dtFiltered = dtWorkExp.Clone();
And instead of doing accessing dsResult.Table[0] each time, you can assign this to one variable and use it.
You can also replace the foreach loop with LINQ.
What I would do:
All rows of the main datatable as enumerable:
var rows = dsResult.Tables[0].AsEnumerable();
Get the column you're going to filter with:
var filter = rows.Select(r => r.Field<string>("Registration NO."));
Create a method that accepts that filter, a table to filter and a field to compare.
public static DataTable Filter<T>(EnumerableRowCollection<T> filter, DataTable table, string fieldName)
{
return table.AsEnumerable().Where(r => filter.Contains(r.Field<T>(fieldName))).CopyToDataTable();
}
Finally use the method to filter all tables:
var dtFiltered = Filter<string>(filter, dtWorkExp, "UserId");
var dtFilteredAward = Filter<string>(filter, dtAwards, "UserId");
var dtFilteredOtherQual = Filter<string>(filter, dtOtherQual, "UserId");
All together woul be something like this
public void YourMethod()
{
var rows = dsResult.Tables[0].AsEnumerable();
var filter = rows.Select(r => r.Field<string>("Registration NO."));
var dtFiltered = Filter<string>(filter, dtWorkExp, "UserId");
var dtFilteredAward = Filter<string>(filter, dtAwards, "UserId");
var dtFilteredOtherQual = Filter<string>(filter, dtOtherQual, "UserId");
}
public static DataTable Filter<T>(EnumerableRowCollection<T> filter, DataTable table, string fieldName)
{
return table.AsEnumerable().Where(r => filter.Contains(r.Field<T>(fieldName))).CopyToDataTable();
}
Put the value of the expression in a variable.
var regNo = dsResult.Tables[0].Rows[i]["Registration NO."].ToString();
Put the index of column to the variable. Access by index more faster then by column name.
int index = dtWorkExp.Columns["UserId"].Ordinal;
Result code:
int dtWorkIndex = dtWorkExp.Columns["UserId"].Ordinal;
int dtAwardsIndex = dtAwards.Columns["UserId"].Ordinal;
int dtOtherQualIdex = dtOtherQual.Columns["UserId"].Ordinal;
for (int i = 0; i < dsResult.Tables[0].Rows.Count; i++)
{
var regNo = dsResult.Tables[0].Rows[i]["Registration NO."].ToString();
DataTable dtFiltered = dtWorkExp.Clone();
foreach (DataRow drr in dtWorkExp.Rows)
{
if (drr[dtWorkIndex].ToString() == regNo)
{
dtFiltered.ImportRow(drr);
}
}
...
Of course, the column index can be set as a constant if you know it exactly in advance. Also, if the UserId indexes match in all tables, a single variable is sufficient.
You can also try using the BeginLoadData and EndLoadData methods.
DataTable dtFiltered = dtWorkExp.Clone();
dtFiltered.BeginLoadData();
foreach (DataRow drr in dtWorkExp.Rows)
{
if (drr[dtWorkIndex].ToString() == regNo)
{
dtFiltered.ImportRow(drr);
}
}
dtFiltered.EndLoadData();
But I'm not sure if they make sense together with ImportRow.
Finally, parallelization comes to help.
for (int i = 0; i < dsResult.Tables[0].Rows.Count; i++)
{
var regNo = ...;
var workTask = Task.Run(() =>
{
DataTable dtFiltered = dtWorkExp.Clone();
foreach (DataRow drr in dtWorkExp.Rows)
{
if (drr[dtWorkIndex].ToString() == regNo)
{
dtFiltered.ImportRow(drr);
}
}
return dtFiltered;
});
var awardTask = Task.Run(() =>
...
var otherQualTask = Task.Run(() =>
...
//Task.WaitAll(workTask, awardTask, otherQualTask);
await Task.WhenAll(workTask, awardTask, otherQualTask);
//Do some operation with filtered Data Tables
}
I need to create a DataTable from another DataTable that I have but with only some columns (exactly same rows).
Is there an easy way to do this using LINQ?
I tried:
DataTable dataCopy = dt.AsEnumerable()
.Select(r => new { A = r.Field<string>("Column A"),
B = r.Field<string>("Column B") });
But it doesn't have a "CopyToDataTable" method.
I'm looking for the best performance way to do this because my "source" DataTable is huge!
You can simply copy the dataTable using dataTable.Copy() and remove the unwanted columns from the copied object.
var dt1 = dt.Copy();
dt.Columns.Remove("<columnsToRemove>")
Stopwatch time = new Stopwatch();
time.Start();
//COMPARE YOUR CODE (.Copy, Clone or Develop yourself)
DataTable dtTarget = dtSource.CopyDataTable(new List<string>() { "A", "B" });
foreach (DataColumn column in dtTarget.Columns)
{
Console.WriteLine("ColumnName : {0}", column.ColumnName);
foreach (DataRow row in dtTarget.Rows)
{
Console.WriteLine("Rows : {0}", row[column.ColumnName]);
}
}
time.Stop();
Console.WriteLine("{0}", time.Elapsed);
public static class DataTableHelper
{
public static DataTable CopyDataTable(
this DataTable dtSource,
List<string> columnsName)
{
DataTable dtTarget = new DataTable();
if (dtSource.Columns.Count > 0)
{
foreach (DataColumn columnSource in dtSource.Columns)
{
var columnTargetMapped = columnsName.FirstOrDefault(c => c == columnSource.ColumnName);
if(columnTargetMapped == null)
{
continue;
}
dtTarget.Columns.Add(columnTargetMapped);
foreach (DataRow drSource in dtSource.Rows)
{
var valueColumn = drSource[columnSource];
DataRow drTarget = dtTarget.NewRow();
drTarget[columnTargetMapped] = valueColumn;
dtTarget.Rows.Add(drTarget);
}
}
}
return dtTarget;
}
public static DataTable GetDataTablePart(this DataTable dt, params string[] ColumnNames)
{
var dtPart = new DataTable("TablePart");
var Names = new List<DataColumn>();
foreach (DataColumn Column in dt.Columns)
{
if(ColumnNames.Contains(Column.ColumnName))
{
Names.Add(Column);
}
}
dtPart.Columns.AddRange(Names.ToArray());
foreach(DataRow row in dt.Rows)
{
var NewRow = new object[Names.Count()];
var i = 0;
foreach (var Name in Names)
{
NewRow[i] = row[Name];
i = i + 1;
}
dtPart.LoadDataRow(NewRow, false);
}
return dtPart;
}
linq version....
public static DataTable GetDataTablePart(this DataTable dt, params string[] ColumnNames)
{
var RowCount = 0;
var dtPart = new DataTable("TablePart");
dtPart.Columns.AddRange((from column in dt.Columns.Cast<DataColumn>()
where ColumnNames.Contains(column.ColumnName)
select column).ToArray());
return (from row in dt.AsEnumerable()
let rowCount = RowCount = dt.Rows.Count
let RowValues = (from column in dtPart.Columns.Cast<DataColumn>()
select row[column]).ToArray()
let decCount = RowCount = RowCount - 1
where dtPart.LoadDataRow(RowValues,LoadOption.OverwriteChanges) != default && decCount > 0
select dtPart).FirstOrDefault();
}
I have data like
ConcurrentDictionary<string, ConcurrentDictionary<string,string> OneTwoThree =
new ConcurrentDictionary<string, ConcurrentDictionary<string, string>();
I want a result like this Final Image
What I tried:
DataTable dt = new DataTable();
dt.Columns.Add("TwoDetails");
ISet<string> two = new HashSet<string>();
Parallel.ForEach(OneTwoThree , One=>
{
dt.Columns.Add(One.Key);
foreach(var Two in One.Value)
{
two.Add(Two.Key); // To get Distinct Values
}
});
foreach(var item in two)
{
var row = dt.NewRow();
row["TwoDetails"] = row;
}
Now I don't have the idea to append "Three-Values" to a particular cell, as shown in the image.
Any Suggestions.
Another pivot table question. Done a 1000. See code below :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
namespace ConsoleApplication31
{
class Program
{
static void Main(string[] args)
{
Dictionary<string, Dictionary<string, string>> OneTwoThree = new Dictionary<string, Dictionary<string, string>>() {
{"A", new Dictionary<string,string>(){{"U","s"}, {"Z","a"}}},
{"B", new Dictionary<string,string>(){{"W","e"},{"X","d"},{"Y","d"}}},
{"C", new Dictionary<string,string>(){{"V","f"}, {"W","a"},{"Z","w"}}},
};
string[] columns = OneTwoThree.Select(x => x.Key).OrderBy(x => x).ToArray();
DataTable dt = new DataTable();
dt.Columns.Add("TwoDetails", typeof(string));
foreach(string column in columns)
{
dt.Columns.Add(column, typeof(string));
}
string[] rows = OneTwoThree.Select(x => x.Value.Select(y => y.Key)).SelectMany(x => x).Distinct().OrderBy(x => x).ToArray();
var flip = rows.Select(x => new { row = x, columns = OneTwoThree.Where(y => y.Value.ContainsKey(x)).Select(y => new { col = y.Key, value = y.Value[x] }).ToList() }).ToList();
//create pivot table
foreach (var row in flip)
{
DataRow newRow = dt.Rows.Add();
newRow["TwoDetails"] = row.row;
foreach (var column in row.columns)
{
newRow[column.col] = column.value;
}
}
}
}
}
You can not access the data table rows with their names but using row numbers you can find a particular row in the data table.So for that, You need to create one class like
public class DataTableDetails
{
public string RowName { get; set; }
public int RowNumber { get; set; }
}
And this is not effecient solution, but can help you
ISet<string> two = new HashSet<string>();
List<DataTableDetails> dtdetaills=new List<DataTableDetails>();
Parallel.ForEach(OneTwoThree , One=>
{
dt.Columns.Add(One.Key);
foreach(var Two in One.Value)
{
two.Add(Two.Key); // To get Distinct Values
}
});
int count=0;
foreach(var item in two)
{
var row = dt.NewRow();
row["TwoDetails"] = row;
DataTableDetails details = new DataTableDetails();
details.RowName = item;
details.RowNumber = count++; // we can easily get row number
dtdetails.Add(details);
}
And Finally
foreach(var One in OnrTwoThree)
{
foreach(var Two in One.Value)
{
foreach(var rowdetails in dtdetails)
{
if(Two.Key==rowdetails.RowName)
{
dt.Rows[rowdetails.RowNumber][One.Key] = Two.Value;
}
}
}
}
I need to loop through a List of Dictionaries
List<Dictionary<string,string>>
to populate a DataTable. Each Dictionary in the list has a Key, which needs to be the column name, and a Value which is what's in that column. The list contains 225 dictionaries (225 rows to the table).
List<Dictionary<string, string>> myList =
JsonConvert.DeserializeObject<List<Dictionary<string, string>>>(jsonRep);
DataTable dt = new DataTable();
//loop through list, loop through dictionaries, add keys as columns,
//values as rows.
so far, I have been trying..
//get max columns
int columns = myList[0].Count; <--gives me 13
//add columns
for (int i = 0; i < columns; i++)
dt.Columns.Add(string myList[i].Keys); <--somehow get to the key in dict to add as column names
//add rows
foreach (var x in myList)
{
dt.Rows.Add(x); <--not working
}
jsonReprValue = dt; <--save new DataTable to var jsonReprValue
How do I do this correctly?
Thanks!
You have two problems. One is adding the columns, and the other is adding the rows.
Adding the columns
Assuming your list has items in it, and that all the dictionaries in the list have the same keys, then you only need to add columns from one of the dictionaries:
foreach(string column in myList[0].Keys)
{
dt.Columns.Add(column);
}
Adding the Rows
Change this:
foreach (var x in myList)
{
dt.Rows.Add(x); <--not working
}
To this:
foreach(Dictionary<string, string> dictionary in myList)
{
DataRow dataRow = dt.NewRow();
foreach(string column in dictionary.Keys)
{
dataRow[column] = dictionary[column];
}
dt.Rows.Add(dataRow);
}
See DataTable.NewRow.
Reiview DataTable Class's example on how to work with a data table. But here is an example using Linq
List<Dictionary<string, string>> myList = new List<Dictionary<string, string>>()
{ new Dictionary<string,string>() { { "ABC", "This" },
{ "DEF", "is" },
{ "GHI", "radio" },
{ "JKL", "clash" } } };
DataTable dt = new DataTable();
// Add columns first
dt.Columns.AddRange( myList.First ()
.Select (kvp => new DataColumn() { ColumnName = kvp.Key, DataType = System.Type.GetType("System.String")} )
.AsEnumerable()
.ToArray()
);
// Now add the rows
myList.SelectMany (Dict => Dict.Select (kvp => new {
Row = dt.NewRow(),
Kvp = kvp
}))
.ToList()
.ForEach( rowItem => {
rowItem.Row[rowItem.Kvp.Key] = rowItem.Kvp.Value;
dt.Rows.Add( rowItem.Row );
}
);
dt.Dump();
The result (Dump is LinqPad specific not .Net):