Linq performance and delayed execution - c#

I have run some tests for .Net CF. Basically, I wanted to compare for, foreach, extenstion method ForEach and LINQ query. Here is the whole code (you can skip it, to get to the point which bothers me)
namespace ForEachForLINQPerTest
{
class IntBox
{
public int fieldX;
public int PropertyX { get; set; }
}
public partial class MainPage : PhoneApplicationPage
{
/// <summary>
/// size of tested List
/// </summary>
public const int TEST_SIZE = 1000000;
//
private List<int> m_intList = new List<int>(TEST_SIZE);
//
private List<IntBox> m_intBoxList = new List<IntBox>(TEST_SIZE);
//
private Stopwatch m_stopwatch = null;
// Constructor
public MainPage()
{
InitializeComponent();
for (int i = 0; i < TEST_SIZE; ++i)
{
m_intBoxList.Add( new IntBox());
m_intList.Add(0);
}
}
private void startButton_Click(object sender, RoutedEventArgs e)
{
var forTest = ForTest(); // Jitter preheat
forTest = ForTest();
forResultTextBlock.Text = forTest;
var foreachTest = ForEachTest();
foreachTest = ForEachTest();
foreachResultTextBlock.Text = foreachTest;
var exTest = Extenstion();
exTest = Extenstion();
ExtensionResultTextBlock.Text = exTest;
var linqTest = LINQTest();
linqTest = LINQTest();
LINQResultTextBlock.Text = linqTest;
}
private string LINQTest()
{
m_stopwatch = new Stopwatch();
m_stopwatch.Start();
long temp = 0;
var result = from x in m_intList
select temp += x;
m_stopwatch.Stop();
var intListTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
result.ToList();
m_stopwatch.Start();
var result2 = from x in m_intBoxList
select temp += x.fieldX;
m_stopwatch.Stop();
var intBoxListFieldTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
result2.ToList();
m_stopwatch.Start();
var result3 = from x in m_intBoxList
select temp += x.PropertyX;
m_stopwatch.Stop();
var intBoxListPropertyTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
result3.ToList();
return String.Format("LINQ test List<int> = {0} \n List<IntBox> field = {1} \n List<IntBos> property = {2}", intListTime, intBoxListFieldTime, intBoxListPropertyTime);
}
private string Extenstion()
{
m_stopwatch = new Stopwatch();
m_stopwatch.Start();
long temp = 0;
m_intList.ForEach(i => temp += i);
m_stopwatch.Stop();
var intListTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
m_stopwatch.Start();
m_intBoxList.ForEach(i => temp += i.fieldX);
m_stopwatch.Stop();
var intBoxListFieldTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
m_stopwatch.Start();
m_intBoxList.ForEach(i => temp += i.PropertyX);
m_stopwatch.Stop();
var intBoxListPropertyTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
return String.Format("Extenstion test List<int> = {0} \n List<IntBox> field = {1} \n List<IntBos> property = {2}", intListTime, intBoxListFieldTime, intBoxListPropertyTime);
}
private string ForEachTest()
{
m_stopwatch = new Stopwatch();
long temp = 0;
m_stopwatch.Start();
foreach(int item in m_intList)
{
temp += item;
}
m_stopwatch.Stop();
var intListTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
m_stopwatch.Start();
foreach (IntBox item in m_intBoxList)
{
temp += item.fieldX;
}
m_stopwatch.Stop();
var intBoxListFieldTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
m_stopwatch.Start();
foreach (IntBox item in m_intBoxList)
{
temp += item.PropertyX;
}
m_stopwatch.Stop();
var intBoxListPropertyTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
return String.Format("ForEach test List<int> = {0} \n List<IntBox> field = {1} \n List<IntBos> property = {2}", intListTime, intBoxListFieldTime, intBoxListPropertyTime);
}
private string ForTest()
{
m_stopwatch = new Stopwatch();
m_stopwatch.Start();
long temp = 0;
for (int i = 0; i < TEST_SIZE; ++i)
{
temp += m_intList[i];
}
m_stopwatch.Stop();
var intListTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
m_stopwatch.Start();
for (int i = 0; i < m_intList.Count; ++i)
{
temp += m_intBoxList[i].fieldX;
}
m_stopwatch.Stop();
var intBoxListFieldTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
m_stopwatch.Start();
for (int i = 0; i < m_intList.Count; ++i)
{
temp += m_intBoxList[i].PropertyX;
}
m_stopwatch.Stop();
var intBoxListPropertyTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
return String.Format("For loop test List<int> = {0} \n List<IntBox> field = {1} \n List<IntBos> property = {2}", intListTime, intBoxListFieldTime, intBoxListPropertyTime);
}
}
}
And here I am confused
m_stopwatch = new Stopwatch();
m_stopwatch.Start();
long temp = 0;
var result = from x in m_intList
select temp += x;
m_stopwatch.Stop();
var intListTime = m_stopwatch.ElapsedMilliseconds;
m_stopwatch.Reset();
result.ToList();
The ouput is:
For loop test List = 93
List field = 119 // ref -> field
List property = 136 // ref -> property -> field properties are just functions for CF
ForEach test List = 88
List field = 140
List property = 152
Extenstions test List = 176 // another function is called.
List field = 220
List property = 239
LINQ test List = 0 Why?
List field = 163
List property = 165
Why intListTime == 0? What am I doing wrong? Also the last two values for field and property are almost the same (run it a few times). Does it mean that PropertyX in LINQ query is evaluated in-line?

The first time is zero because expression tree is built at compile time and it gets evaluated on ToList call that you have not included in timing.
For field and property access timing, I wouldn't worry too much - in reality, in release build, simple property accessor will be get inlined giving same performance as field access. For linq case, you might be seeing the same performance because linq internally might be converting property/field access into a method call and it would result in same timings (as I believe that method call overhead will be probably large compared to the field/prop access.

this is called "deferred execution". the linq statement isn't evaluated until it needs to be. move the ToList to be before you stop the clock and the time will go up

Related

Captured variable-like error in Parallel.For loop

The following code section worked appropriately:
Parallel.For(
0, numberOfRunsNeeded, j =>
{
var copyOfj = j;
var researchItems = viewModel.ResearchItems[queryNumber].GetRange((int)(copyOfj * itemsAtOnce), Math.Min(itemsAtOnce, viewModel.ResearchItems[queryNumber].Count - (copyOfj * itemsAtOnce)));
var finalQuery = GetCorrectedQuery(query.BaseQuery, query.SQLVariants[copyOfi]);
if (researchItems.Count > 0)
{
finalQuery = GetCorrectedQueryWithResearchItems(finalQuery, researchItems, query.SQLVariants[copyOfi]);
}
PerformSingleRun(query, copyOfi, dataSource, finalQuery, copyOfj, viewModel);
}
);
This updated code shows captured variable-like errors - finalQuery comes back like the For loop is repeating the same value many times:
Parallel.For(
0, numberOfRunsNeeded, parallelOptionsWithMaxDegreeOfParallelism, j =>
{
var copyOfj = j;
if (researchItemsPresent)
{
var researchItems = ViewModel.ResearchItems[queryNumber].GetRange(copyOfj * itemsAtOnce, Math.Min(itemsAtOnce, ViewModel.ResearchItems[queryNumber].Count - (copyOfj * itemsAtOnce)));
finalQuery = GetAdaptedBaseQueryWithResearchItemsInserted(finalQuery, researchItems, query.SQLVariants[copyOfi]);
}
PerformSingleRun(query, copyOfi, dataSource, finalQuery, copyOfj, viewModel);
}
);
As stated above, I already have working code - I'm just trying to understand what I did wrong in my revision. Full methods listed below:
Previous, working:
public static void ProcessSingleQuery(int queryNumber, ViewModel viewModel)
{
var query = new Query
{
Name = viewModel.QueryNames[queryNumber],
BaseQuery = viewModel.BaseQueries[queryNumber],
SelectedDatabases = viewModel.SelectedDatabases[queryNumber],
SQLVariants = viewModel.SQLVariants[queryNumber],
Usernames = viewModel.Usernames[queryNumber],
Passwords = viewModel.Passwords[queryNumber],
CSVFiles = viewModel.CSVFiles[queryNumber],
CSVFileAliases = viewModel.CSVFileAliases[queryNumber],
ColumnDelimiters = viewModel.ColumnDelimiters[queryNumber],
HeaderRowsPresent = viewModel.HeaderRowsPresent[queryNumber],
TextDelimiters = viewModel.TextDelimiters[queryNumber],
ResearchItemColumnNumber = viewModel.ResearchItemColumnNumber[queryNumber]
};
for (var i = 0; i < query.SelectedDatabases.Count; i++)
{
var dataSource = GetDataSource(query.SelectedDatabases[i]);
var itemsAtOnce = ViewModel.ItemsAtOnceBySQLVariant[query.SQLVariants[i]];
if (query.SelectedDatabases[i].Equals("CSV"))
{
RefreshOrCreateSchemaIniFile(query);
dataSource = query.CSVFiles[0].DirectoryName;
}
var numberOfRunsNeeded = Math.Max(
(int)Math.Ceiling((double)viewModel.ResearchItems[queryNumber].Count / itemsAtOnce), 1
);
viewModel.QueryRunsCompletedMaximum += numberOfRunsNeeded;
var copyOfi = i;
Parallel.For(
0, numberOfRunsNeeded, j =>
{
var copyOfj = j;
var researchItems = viewModel.ResearchItems[queryNumber].GetRange((int)(copyOfj * itemsAtOnce), Math.Min(itemsAtOnce, viewModel.ResearchItems[queryNumber].Count - (copyOfj * itemsAtOnce)));
var finalQuery = GetCorrectedQuery(query.BaseQuery, query.SQLVariants[copyOfi]);
if (researchItems.Count > 0)
{
finalQuery = GetCorrectedQueryWithResearchItems(finalQuery, researchItems, query.SQLVariants[copyOfi]);
}
PerformSingleRun(query, copyOfi, dataSource, finalQuery, copyOfj, viewModel);
}
);
}
GeneralTools.CombineAndDeleteQueryResults(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), query.Name);
if (query.ResearchItemColumnNumber != 0)
{
CompileMissingItemsReport(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), query.Name, viewModel, queryNumber);
}
}
Revised, broken:
public static void ProcessSingleQuery(int queryNumber, ViewModel viewModel)
{
var query = new Query
{
Name = ViewModel.QueryNames[queryNumber],
BaseQuery = ViewModel.BaseQueries[queryNumber],
SelectedDatabases = ViewModel.SelectedDatabases[queryNumber],
SQLVariants = ViewModel.SQLVariants[queryNumber],
Usernames = ViewModel.Usernames[queryNumber],
Passwords = ViewModel.Passwords[queryNumber],
CSVFiles = ViewModel.CSVFiles[queryNumber],
CSVFileAliases = ViewModel.CSVFileAliases[queryNumber],
ColumnDelimiters = ViewModel.ColumnDelimiters[queryNumber],
HeaderRowsPresent = ViewModel.HeaderRowsPresent[queryNumber],
TextDelimiters = ViewModel.TextDelimiters[queryNumber],
ResearchItemColumnNumber = ViewModel.ResearchItemColumnNumber[queryNumber]
};
for (var i = 0; i < query.SelectedDatabases.Count; i++)
{
var finalQuery = GetAdaptedBaseQuery(query, query.SQLVariants[i]);
var dataSource = GetDataSource(query.SelectedDatabases[i]);
var itemsAtOnce = ViewModel.ItemsAtOnceBySQLVariant[query.SQLVariants[i]];
if (query.SelectedDatabases[i].Contains("CSV"))
{
CreateSchemaIniFile(query);
dataSource = query.CSVFiles[0].DirectoryName;
}
var researchItemsPresent = ViewModel.ResearchItems[queryNumber].Count > 0;
var numberOfRunsNeeded = Math.Max(
(int)Math.Ceiling((double)ViewModel.ResearchItems[queryNumber].Count / itemsAtOnce), 1
);
viewModel.QueryRunsCompletedMaximum += numberOfRunsNeeded;
var copyOfi = i;
var parallelOptionsWithMaxDegreeOfParallelism = new ParallelOptions
{
MaxDegreeOfParallelism =
query.SQLVariants[i] == SQLVariant.Teradata ? 6 : -1
};
Parallel.For(
0, numberOfRunsNeeded, parallelOptionsWithMaxDegreeOfParallelism, j =>
{
var copyOfj = j;
if (researchItemsPresent)
{
var researchItems = ViewModel.ResearchItems[queryNumber].GetRange(copyOfj * itemsAtOnce, Math.Min(itemsAtOnce, ViewModel.ResearchItems[queryNumber].Count - (copyOfj * itemsAtOnce)));
finalQuery = GetAdaptedBaseQueryWithResearchItemsInserted(finalQuery, researchItems, query.SQLVariants[copyOfi]);
}
PerformSingleRun(query, copyOfi, dataSource, finalQuery, copyOfj, viewModel);
}
);
}
GeneralTools.CombineAndDeleteQueryResults(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), query.Name);
if (query.ResearchItemColumnNumber != 0)
{
CompileMissingItemsReport(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), query.Name, queryNumber);
}
}
Why your broken version is broken
The problem appears to be two-fold:
First, you have a variable called finalQuery in an outer scope which you also use in a closure, specifically the one passed in as the body delegate of your Parallel.For, and is therefore the same variable in all iterations of your Parallel.For.
Second, you both read and write this finalQuery variable in that same Parallel.For body, notably with the code:
finalQuery = GetAdaptedBaseQueryWithResearchItemsInserted(finalQuery, ...)
...where you'll see you pass the current value of finalQuery as your base query.
The order in which the various iterations of that loop reach that line of code can change and depends on system architecture and processor load, causing a race condition. Access to your variable is also not governed by a lock.
Why the other version worked
In your working version, finalQuery is a variable that is declared within and therefore entirely local to the Parallel.For body function. This prevents any iterations from seeing values of finalQuery from other iterations. And more importantly, each finalQuery is constructed from a common, invariant base query (query.baseQuery) with this code:
var finalQuery = GetCorrectedQuery(query.BaseQuery, ...)
And although you further adjust the value of finalQuery in the line below:
finalQuery = GetCorrectedQueryWithResearchItems(finalQuery, ...)
...this is fine because this finalQuery variable is local to your lambda function and its value is based solely on the previous line, and fortunately, not from varying values being written by other iterations of the Parallel.For, as was the case in your race condition.

List from fields of elements

I can turn my dictionary Dictionary<long,MyObj> to a list with .Values, where MyObj has the Vector3 field named position.
I would like to have a list of the positions (the value type): MyObj.position, like so: List<Vector3> positions.
Performance is critical.
Is there a way to achieve this in C# with LINQ or otherwise other than?
foreach e in myDict
mylist.Add(e.Value.position)
So, using the code below:
public static void Main(string[] args)
{
var stopwatch1 = new Stopwatch();
var dictionaryTest = GetDictionary(1000);
stopwatch1.Start();
var results = dictionaryTest.Values.Select(x => x.Position).ToList();
stopwatch1.Stop();
var stopwatch2 = new Stopwatch();
stopwatch2.Start();
var results2 = dictionaryTest.Select(obj => obj.Value.Position).ToList();
stopwatch2.Stop();
var stopwatch3 = new Stopwatch();
stopwatch3.Start();
var myList = new List<double>();
foreach (var pair in dictionaryTest)
{
myList.Add(pair.Value.Position);
}
stopwatch3.Stop();
Console.WriteLine("results1: " + stopwatch1.Elapsed);
Console.WriteLine("results2: " + stopwatch2.Elapsed);
Console.WriteLine("results3: " + stopwatch3.Elapsed);
Console.Read();
}
public static Dictionary<long, MyUser> GetDictionary(int numberOfRows)
{
var d = new Dictionary<long, MyUser>();
for (int i = 0; i < numberOfRows; i++)
{
d.Add(1000 + i, new MyUser { Age = 10 + i, Position = 100.01 + i });
}
return d;
}
This brought back:
So foreach looks like the quickest of the 3 proposed on my machine. Worth testing yourself though.
With 'optimize code' selected:

How to Merge items within a List<> collection C#

I have a implememtation where i need to loop through a collection of documents and based on certain condition merge the documents .
The merge condition is very simple, if present document's doctype is same as later document's doctype, then copy all the pages from the later doctype and append it to the pages of present document's and remove the later document from the collection.
Note : Both response.documents and response.documents[].pages are List<> collections.
I was trying this but was getting following exception Once I remove the document.
collection was modified enumeration may not execute
Here is the code:
int docindex = 0;
foreach( var document in response.documents)
{
string presentDoctype = string.Empty;
string laterDoctype = string.Empty;
presentDoctype = response.documents[docindex].doctype;
laterDoctype = response.documents[docindex + 1].doctype;
if (laterDoctype == presentDoctype)
{
response.documents[docindex].pages.AddRange(response.documents[docindex + 1].pages);
response.documents.RemoveAt(docindex + 1);
}
docindex = docindex + 1;
}
Ex:
reponse.documents[0].doctype = "BankStatement" //page count = 1
reponse.documents[1].doctype = "BankStatement" //page count = 2
reponse.documents[2].doctype = "BankStatement" //page count = 2
reponse.documents[3].doctype = "BankStatement" //page count = 1
reponse.documents[4].doctype = "BankStatement" //page count = 4
Expected result:
response.documents[0].doctype = "BankStatement" //page count = 10
Please suggest.Appreciate your help.
I would recommend you to look at LINQ GroupBy and Distinct to process your response.documents
Example (as I cannot use your class, I give example using my own defined class):
Suppose you have DummyClass
public class DummyClass {
public int DummyInt;
public string DummyString;
public double DummyDouble;
public DummyClass() {
}
public DummyClass(int dummyInt, string dummyString, double dummyDouble) {
DummyInt = dummyInt;
DummyString = dummyString;
DummyDouble = dummyDouble;
}
}
Then doing GroupBy as shown,
DummyClass dc1 = new DummyClass(1, "This dummy", 2.0);
DummyClass dc2 = new DummyClass(2, "That dummy", 2.0);
DummyClass dc3 = new DummyClass(1, "These dummies", 2.0);
DummyClass dc4 = new DummyClass(2, "Those dummies", 2.0);
DummyClass dc5 = new DummyClass(3, "The dummies", 2.0);
List<DummyClass> dummyList = new List<DummyClass>() { dc1, dc2, dc3, dc4, dc5 };
var groupedDummy = dummyList.GroupBy(x => x.DummyInt).ToList();
Will create three groups, marked by DummyInt
Then to process the group you could do
for (int i = 0; i < groupedDummy.Count; ++i){
foreach (DummyClass dummy in groupedDummy[i]) { //this will process the (i-1)-th group
//do something on this group
//groupedDummy[0] will consists of "this" and "these", [1] "that" and "those", while [2] "the"
//Try it out!
}
}
In your case, you should create group based on doctype.
Once you create groups based on your doctype, everything else would be pretty "natural" for you to continue.
Another LINQ method which you might be interested in would be Distinct. But I think for this case, GroupBy would be the primary method you would like to use.
Use only "for loop" instead of "foreach".
foreach will hold the collection and cannot be modified while looping thru it.
Here is an example using groupBy, hope this help.
//mock a collection
ICollection<string> collection1 = new List<string>();
for (int i = 0; i < 10; i++)
{
collection1.Add("BankStatement");
}
for (int i = 0; i < 5; i++)
{
collection1.Add("BankStatement2");
}
for (int i = 0; i < 4; i++)
{
collection1.Add("BankStatement3");
}
//merge and get count
var result = collection1.GroupBy(c => c).Select(c => new { name = c.First(), count = c.Count().ToString() }).ToList();
foreach (var item in result)
{
Console.WriteLine(item.name + ": " + item.count);
}
Just use AddRange()
response.documents[0].pages.AddRange(response.documents[1].pages);
it will merge all pages of document[1] with the document[0] into document[0]

Combining var and Dynamic statements

Heres a fun problem I have.
I have a function that returns a var of items;
var Items = new { sumList = SumList, ratesList = List, sum = List.Sum() };
return Items;
From a function that is dynamic:
public override dynamic GetRates()
and I return it to a function I else where and try to apply it to my code:
dynamic res = cl.mainC.GetRates();
List<double> MashkantaSumList = res.sumList;
Now when I try to apply it, it says the object doesnt exist. But if I look in the debugger the items are happily there as a generic list or what not.
How do I resolve this?
EDIT:
as per request I'll post the full code:
//virtual
public virtual dynamic TotalMashkanta(int i, double sum, double ribit, string[] discount)
{
return 0;
}
//override
public override dynamic TotalMashkanta(int i, double sum, double ribit, string[] discount)
{
double SumTemp = sum;
double monthlyRibit = ribit / 12;
Double permPayPerMont = Financial.Pmt(monthlyRibit, i, sum, 0, DueDate.EndOfPeriod);
List<double> MashkantaList = new List<double>();
List<double> MashkantaSumList = new List<double>();
for (int j = 1; j <= i; j++)
{
MashkantaList.Add(Mashkanta(j, sum, ribit, permPayPerMont) * (1 - CalcDiscount((j / 12) + 1, discount)));
SumTemp = getSum(j, sum, ribit, permPayPerMont * -1); ;
MashkantaSumList.Add(SumTemp);
}
var K_Mashkanta = new { sumList = MashkantaSumList, ratesList = MashkantaList, sum = MashkantaList.Sum() };
return K_Mashkanta;
}
//Function that calls the results
public void GetSilukinTable(string Path, string ClientID, DAL.Client client, string partner_checked, string insurance_Amount, string Premiya_Structure_Mashkanta, string Premiya_Life_Mashkanta, string Discount_Life_Mashkanta, string Loan_Period,string Loan_EndDate, string Bank, string Loan_Interest, string Loan_Amount, string Discount_Loan, string AgentNotes, string ManID)
{
BL.CalculateLogic.Companies t = BL.CalculateLogic.Companies.כלל;
if(ManID == "211") t = BL.CalculateLogic.Companies.הפניקס;
if(ManID == "207") t = BL.CalculateLogic.Companies.הראל;
if(ManID == "206") t = BL.CalculateLogic.Companies.מנורה;
if(ManID == "208") t = BL.CalculateLogic.Companies.הכשרה;
BL.CalculateLogic cl = new BL.CalculateLogic(client, t);
DateTime LoanEnd = DateTime.Now;
int months = 0;
if (DateTime.TryParse(Loan_EndDate, out LoanEnd))
months = BL.Calculating_Companies.Company.GetMonthsBetween(DateTime.Now, LoanEnd);
else
months = Int32.Parse(Loan_Period) * 12;
string[] Discount = Discount_Loan.Split('-');
dynamic res = cl.mainC.TotalMashkanta(months, Double.Parse(Loan_Amount), Double.Parse(Loan_Interest.Trim('%')), Discount);
var MashkantaSumList = res.sumList;
List<double> MashkantaList = res.ratesList;
List<double> MashkantaSumListPartner = new List<double>();
List<double> MashkantaListPartner = new List<double>();
List<double> MashkantaListSum = res.ratesList;
}
The compiler is happy about it because dynamic is compiled and checked at run time. Whatever the problem is, the types don't match. It evaluates this at run time, so you won't see issues at compile time. (Advice: use dynamic only when you really must! Else you will have this kind of problems all the time!)
I tried your code using this and it works fine:
static dynamic GetRates()
{
List<double> SumList = new List<double>();
List<double> List = new List<double>();
var Items = new { sumList = SumList, ratesList = List, sum = List.Sum() };
return Items;
}
static void Main(string[] args)
{
dynamic res = GetRates();
List<double> MashkantaSumList = res.sumList;
}

Searching for a string in a string

Dear stackoverflow members,
I have this string:
string data = "1Position1234Article4321Quantity2Position4323Article3323Quantity";
I want to search for the values where the "keyword" is Position. In this case I want to get back 1 and 2. Each value is "indexed" with its own "keyword". So the value 1 in this string has the Position seperator. The value 1234 has the Article seperator and the value 4321 has the Quantity seperator.
I need a way to search through the string and want to get all positions, articles and quantitys back. Without the keywords.
Output shout be:
string[] position = {"1", "2"};
string[] article = {"1234", "4323"};
string[] quantity = {"4321", "3323"};
Hopefully some can help me here.
Thanks!
This is q quick solution I've come up with in LinqPad:
void Main()
{
string data = "1Position1234Article4321Quantity2Position4323Article3323Quantity";
var Articles = Indices(data, "Article").Dump("Articles: ");
var Posistions = Indices(data, "Position").Dump("Positions :");
var Quantities = Indices(data, "Quantity").Dump("Quantities :");
}
// Define other methods and classes here
public List<int> Indices(string source, string keyword)
{
var results = new List<int>();
//source: http://stackoverflow.com/questions/3720012/regular-expression-to-split-string-and-number
var temp = Regex.Split(source, "(?<Alpha>[a-zA-Z]*)(?<Numeric>[0-9]*)").ToList().Where (r => !String.IsNullOrEmpty(r)).ToList();
//select the list with index only where key word matches
var indices = temp.Select ((v,i) => new {index = i, value = v})
.Where (t => t.value == keyword);
foreach (var element in indices)
{
int val;
//get previous list entry based on index and parse it
if(Int32.TryParse(temp[element.index -1], out val))
{
results.Add(val);
}
}
return results;
}
Output:
Here's a possible algorithm:
Run trough the list and take each number / keyword.
Put them in a dictionary with key "keyword", value a list with all "numbers".
Iterate the dictionary and print they key + its values.
Below snippet can use to get the output like what you expected.
string data = "1Position1234Article4321Quantity2Position4323Article3323Quantity";
StringBuilder sb = new StringBuilder();
StringBuilder sbWord = new StringBuilder();
bool isDigit = false;
bool isChar = false;
Dictionary<int, string> dic = new Dictionary<int, string>();
int index = 0;
for (int i = 0; i < data.Length; i++)
{
if (char.IsNumber(data[i]))
{
isDigit = true;
if (isChar)
{
dic.Add(index, sb.ToString() + "|" + sbWord.ToString());
index++;
isChar = false;
sb.Remove(0, sb.Length);
sbWord.Remove(0, sbWord.Length);
}
}
else
{
isDigit = false;
isChar = true;
sbWord.Append(data[i]);
}
if (isDigit)
sb.Append(data[i]);
if (i == data.Length - 1)
{
dic.Add(index, sb.ToString() + "|" + sbWord.ToString());
}
}
List<string> Position = new List<string>();
List<string> Article = new List<string>();
List<string> Quantity = new List<string>();
if (dic.Count > 0)
{
for (int i = 0; i < dic.Count; i++)
{
if (dic[i].Split('|')[1] == "Position")
Position.Add(dic[i].Split('|')[0]);
else if (dic[i].Split('|')[1] == "Article")
Article.Add(dic[i].Split('|')[0]);
else
Quantity.Add(dic[i].Split('|')[0]);
}
}
string[] Position_array = Position.ToArray();
string[] Article_array = Article.ToArray();
string[] Quantity_array = Quantity.ToArray();
Try this simple solution.
class StrSplit{
public static void main(String args[]){
int i;
String str = "1Position1234Article4321Quantity2Position4323Article3323Quantity";
String pattern= "(?<=Position)|(?<=Article)|(?<=Quantity)";
String[] parts = str.split(pattern);
List<String> Position = new ArrayList<String>();
List<String> Article = new ArrayList<String>();
List<String> Quantity = new ArrayList<String>();
for( i=0;i<parts.length;i++)
{
pattern="Position";
String[] subParts;
if(parts[i].contains(pattern))
{
subParts = parts[i].split(pattern);
Position.add(subParts[0]);
}
pattern="Article";
if(parts[i].contains(pattern))
{
subParts = parts[i].split(pattern);
Article.add(subParts[0]);
}
pattern="Quantity";
if(parts[i].contains(pattern))
{
subParts = parts[i].split(pattern);
Quantity.add(subParts[0]);
}
}
System.out.println("Position:");
for(i = 0; i < Position.size(); i++) {
System.out.println(Position.get(i));
}
System.out.println("Article:");
for(i = 0; i < Article.size(); i++) {
System.out.println(Article.get(i));
}
System.out.println("Quantity:");
for(i = 0; i < Quantity.size(); i++) {
System.out.println(Quantity.get(i));
}
}
}
Output:
Position:
1
2
Article:
1234
4323
Quantity:
4321
3323

Categories

Resources