DataTable.Rows.Add() adds a row to the data table. However, how does it handle the underlying array?
When adding a single row at a time, does it rebuild the entire array with each row added?
Or is it able to simply modify the existing array without any hit on performance?
I am wondering if it’s better to determine the size of the array before filling it with data, or if somehow the data table is able to modify the collection without (behind the scenes) copying and moving things.
It’s my understanding that to adjust an array you have to redefine it and move previously existing data into the new structure.
My question is what is the work flow for the Collection.Add() method?
Take a look using software like DotPeek:
DataTable.Rows.Add(DataRow row)
{
this.table.AddRow(row, -1);
}
which calls:
DataTable.AddRow(DataRow row, int proposedID)
{
this.InsertRow(row, proposedID, -1);
}
which calls:
DataTable.InsertRow(DataRow row, int proposedID, int pos)
{
this.InsertRow(row, (long) proposedID, pos, true);
}
which calls:
DataTable.InsertRow(DataRow row, long proposedID, int pos, bool fireEvent)
{
Exception deferredException = (Exception) null;
if (row == null)
throw ExceptionBuilder.ArgumentNull("row");
if (row.Table != this)
throw ExceptionBuilder.RowAlreadyInOtherCollection();
if (row.rowID != -1L)
throw ExceptionBuilder.RowAlreadyInTheCollection();
row.BeginEdit();
int proposedRecord = row.tempRecord;
row.tempRecord = -1;
if (proposedID == -1L)
proposedID = this.nextRowID;
bool flag;
if (flag = this.nextRowID <= proposedID)
this.nextRowID = checked (proposedID + 1L);
try
{
try
{
row.rowID = proposedID;
this.SetNewRecordWorker(row, proposedRecord, DataRowAction.Add, false, false, pos, fireEvent, out deferredException);
}
catch
{
if (flag && this.nextRowID == proposedID + 1L)
this.nextRowID = proposedID;
row.rowID = -1L;
row.tempRecord = proposedRecord;
throw;
}
if (deferredException != null)
throw deferredException;
if (!this.EnforceConstraints || this.inLoad)
return;
int count = this.columnCollection.Count;
for (int index = 0; index < count; ++index)
{
DataColumn dataColumn = this.columnCollection[index];
if (dataColumn.Computed)
dataColumn.CheckColumnConstraint(row, DataRowAction.Add);
}
}
finally
{
row.ResetLastChangedColumn();
}
}
which calls:
DataTable.SetNewRecordWorker(DataRow row, int proposedRecord, DataRowAction action, bool isInMerge, bool suppressEnsurePropertyChanged, int position, bool fireEvent, out Exception deferredException)
{
deferredException = (Exception) null;
if (row.tempRecord != proposedRecord)
{
if (!this.inDataLoad)
{
row.CheckInTable();
this.CheckNotModifying(row);
}
if (proposedRecord == row.newRecord)
{
if (!isInMerge)
return;
this.RaiseRowChanged((DataRowChangeEventArgs) null, row, action);
return;
}
else
row.tempRecord = proposedRecord;
}
DataRowChangeEventArgs args = (DataRowChangeEventArgs) null;
try
{
row._action = action;
args = this.RaiseRowChanging((DataRowChangeEventArgs) null, row, action, fireEvent);
}
catch
{
row.tempRecord = -1;
throw;
}
finally
{
row._action = DataRowAction.Nothing;
}
row.tempRecord = -1;
int record = row.newRecord;
int num = proposedRecord != -1 ? proposedRecord : (row.RowState != DataRowState.Unchanged ? row.oldRecord : -1);
if (action == DataRowAction.Add)
{
if (position == -1)
this.Rows.ArrayAdd(row);
else
this.Rows.ArrayInsert(row, position);
}
List<DataRow> cachedRows = (List<DataRow>) null;
if ((action == DataRowAction.Delete || action == DataRowAction.Change) && (this.dependentColumns != null && this.dependentColumns.Count > 0))
{
cachedRows = new List<DataRow>();
for (int index = 0; index < this.ParentRelations.Count; ++index)
{
DataRelation relation = this.ParentRelations[index];
if (relation.ChildTable == row.Table)
cachedRows.InsertRange(cachedRows.Count, (IEnumerable<DataRow>) row.GetParentRows(relation));
}
for (int index = 0; index < this.ChildRelations.Count; ++index)
{
DataRelation relation = this.ChildRelations[index];
if (relation.ParentTable == row.Table)
cachedRows.InsertRange(cachedRows.Count, (IEnumerable<DataRow>) row.GetChildRows(relation));
}
}
if (!suppressEnsurePropertyChanged && !row.HasPropertyChanged && (row.newRecord != proposedRecord && -1 != proposedRecord) && -1 != row.newRecord)
{
row.LastChangedColumn = (DataColumn) null;
row.LastChangedColumn = (DataColumn) null;
}
if (this.LiveIndexes.Count != 0)
{
if (-1 == record && -1 != proposedRecord && (-1 != row.oldRecord && proposedRecord != row.oldRecord))
record = row.oldRecord;
DataViewRowState recordState1 = row.GetRecordState(record);
DataViewRowState recordState2 = row.GetRecordState(num);
row.newRecord = proposedRecord;
if (proposedRecord != -1)
this.recordManager[proposedRecord] = row;
DataViewRowState recordState3 = row.GetRecordState(record);
DataViewRowState recordState4 = row.GetRecordState(num);
this.RecordStateChanged(record, recordState1, recordState3, num, recordState2, recordState4);
}
else
{
row.newRecord = proposedRecord;
if (proposedRecord != -1)
this.recordManager[proposedRecord] = row;
}
row.ResetLastChangedColumn();
if (-1 != record && record != row.oldRecord && (record != row.tempRecord && record != row.newRecord) && row == this.recordManager[record])
this.FreeRecord(ref record);
if (row.RowState == DataRowState.Detached && row.rowID != -1L)
this.RemoveRow(row, false);
if (this.dependentColumns != null)
{
if (this.dependentColumns.Count > 0)
{
try
{
this.EvaluateExpressions(row, action, cachedRows);
}
catch (Exception ex)
{
if (action != DataRowAction.Add)
throw ex;
deferredException = ex;
}
}
}
try
{
if (!fireEvent)
return;
this.RaiseRowChanged(args, row, action);
}
catch (Exception ex)
{
if (!ADP.IsCatchableExceptionType(ex))
throw;
else
ExceptionBuilder.TraceExceptionWithoutRethrow(ex);
}
}
which calls one of those:
DataRowCollection.ArrayAdd(DataRow row)
{
row.RBTreeNodeId = this.list.Add(row);
}
DataRowCollection.ArrayInsert(DataRow row, int pos)
{
row.RBTreeNodeId = this.list.Insert(pos, row);
}
this.list is of type DataRowCollection.DataRowTree, derived from RBTree<DataRow>.
private sealed class DataRowTree : RBTree<DataRow>
RBTree<DataRow> and RBTreeNodeId allows us to conclude that a Red-Black tree is being used!
Google Guava provides nice helpers to implement equals and hashCode like the following example demonstrates:
public int hashCode() {
return Objects.hashCode(lastName, firstName, gender);
}
Is there a similar library for Microsoft .NET?
I don't see why you'd need one. If you want to create a hash-code based on the default GetHashCode for 3 different items, then just use:
Tuple.Create(lastName, firstName, gender).GetHashCode()
That'll boil down to the equivalent of:
int h1 = lastName.GetHashCode();
int h2 = firstName.GetHashCode();
int h3 = gender.GetHashCode();
return (((h1 << 5) + h1) ^ (((h2 << 5) + h2) ^ h3));
Which is pretty reasonable for such a general-purpose combination.
Likewise:
Tuple.Create(lastName, firstName, gender).Equals(Tuple.Create(lastName2, firstName2, gender2))
Would boil down to the equivalent of calling:
return ((lastName == null && lastName2 == null) || (lastName != null && lastName.Equals(lastName2)))
&& ((firstName == null && firstName2 == null) || (firstName != null && firstName.Equals(lastName2)))
&& ((gender == null && gender2 == null) || (gender != null && gender.Equals(lastName2)));
Again, about as good as you could expect.
AFAIK none. However, writing your own shouldn't be too complex (nb using a variation of the Bernstein hash):
public static class Objects
{
public static bool Equals<T>(T item1, T item2, Func<T, IEnumerable<object>> selector)
{
if (object.ReferenceEquals(item1, item2) return true;
if (item1 == null || item2 == null) return false;
using (var iterator1 = selector(item1).GetEnumerator())
using (var iterator2 = selector(item2).GetEnumerator())
{
var moved1 = iterator1.MoveNext();
var moved2 = iterator2.MoveNext();
if (moved1 != moved2) return false;
if (moved1 && moved2)
{
if (!Equals(iterator1.Current, iterator2.Current)) return false;
}
}
return true;
}
public static bool Equals(object item1, object item2)
{
return object.Equals(item1, item2);
}
public static int GetHashCode(params object[] objects)
{
unchecked
{
int hash = 17;
foreach (var item in objects)
{
hash = hash * 31 + item.GetHashCode();
}
return hash;
}
}
}
I'm pretty happy with the following method. It takes an enumerable and a list of sorted, disjoint ranges and skips items not in the ranges. If the ranges are null, we just walk every item. The enumerable and the list of ranges are both possibly large. We want this method to be as high performance as possible.
Can someone think of a more elegant piece of code? I'm primarily interested in C# implementations, but if someone has a three-character APL implementation, that's cool too.
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
Debug.Assert(ranges == null || ranges.Count > 0);
int currentItem = 0;
Pair<int, int> currentRange = new Pair<int, int>();
int currentRangeIndex = -1;
bool betweenRanges = false;
if (ranges != null)
{
currentRange = ranges[0];
currentRangeIndex = 0;
betweenRanges = currentRange.First > 0;
}
foreach (T item in source)
{
if (ranges != null) {
if (betweenRanges) {
if (currentItem == currentRange.First)
betweenRanges = false;
else {
currentItem++;
continue;
}
}
}
yield return item;
if (ranges != null) {
if (currentItem == currentRange.Second) {
if (currentRangeIndex == ranges.Count - 1)
break; // We just visited the last item in the ranges
currentRangeIndex = currentRangeIndex + 1;
currentRange = ranges[currentRangeIndex];
betweenRanges = true;
}
}
currentItem++;
}
}
Maybe use linq on your source something like:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
if(ranges == null)
return null;
return source.Where((item, index) => ranges.Any(y => y.First < index && y.Second > index)).AsEnumerable();
}
I don't have my Windows PC in front of me and I'm not sure I understood your code correctly, but I tried to understand your text instead and the code above could work.... or something like it.
UPDATED: Regarding the performance issue I would recommend you to test the performance with some simple test and time both of the functions.
You could copy the source list to an array and then for each range, you can block copy from your new source array to a target array in the proper location. If you can get your source collection passed in as an array, that would make this an even better approach. If you do have to do the initial copy, it is O(N) for that operation plus O(M) where M is the total number of items in the final array. So it ends up coming out to O(N) in either case.
Here's my take. I find it easier to understand, if not more elegant.
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Tuple<int, int>> ranges)
{
if (ranges == null)
return source;
Debug.Assert(ranges.Count > 0);
return WalkRangesInternal(source, ranges);
}
static IEnumerable<T> WalkRangesInternal<T>(IEnumerable<T> source, List<Tuple<int, int>> ranges)
{
int currentItem = 0;
var rangeEnum = ranges.GetEnumerator();
bool moreData = rangeEnum.MoveNext();
using (var sourceEnum = source.GetEnumerator())
while (moreData)
{
// skip over every item in the gap between ranges
while (currentItem < rangeEnum.Current.Item1
&& (moreData = sourceEnum.MoveNext()))
currentItem++;
// yield all the elements in the range
while (currentItem <= rangeEnum.Current.Item2
&& (moreData = sourceEnum.MoveNext()))
{
yield return sourceEnum.Current;
currentItem++;
}
// advance to the next range
moreData = rangeEnum.MoveNext();
}
}
How about this (untested)? Should have pretty similar performance characteristics (pure streaming, no unnecessary buffering, quick exit), but is easier to follow, IMO:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source,
List<Pair<int, int>> ranges)
{
if (source == null)
throw new ArgumentNullException("source");
// If ranges is null, just return the source. From spec.
return ranges == null ? source : RangeIterate(source, ranges);
}
private static IEnumerable<T> RangeIterate<T>(IEnumerable<T> source,
List<Pair<int, int>> ranges)
{
// The key bit: a lazy sequence of all valid indices belonging to
// each range. No buffering.
var validIndices = from range in ranges
let start = Math.Max(0, range.First)
from validIndex in Enumerable.Range(start, range.Second - start + 1)
select validIndex;
int currentIndex = -1;
using (var indexErator = validIndices.GetEnumerator())
{
// Optimization: Get out early if there are no ranges.
if (!indexErator.MoveNext())
yield break;
foreach (var item in source)
{
if (++currentIndex == indexErator.Current)
{
// Valid index, yield.
yield return item;
// Move to the next valid index.
// Optimization: get out early if there aren't any more.
if (!indexErator.MoveNext())
yield break;
}
}
}
}
If you don't mind buffering indices, you can do something like this, which is even more clearer, IMO:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source,
List<Pair<int, int>> ranges)
{
if (source == null)
throw new ArgumentNullException("source");
if (ranges == null)
return source;
// Optimization: Get out early if there are no ranges.
if (!ranges.Any())
return Enumerable.Empty<T>();
var validIndices = from range in ranges
let start = Math.Max(0, range.First)
from validIndex in Enumerable.Range(start, range.Second - start + 1)
select validIndex;
// Buffer the valid indices into a set.
var validIndicesSet = new HashSet<int>(validIndices);
// Optimization: don't take an item beyond the last index of the last range.
return source.Take(ranges.Last().Second + 1)
.Where((item, index) => validIndicesSet.Contains(index));
}
You could iterate over the collection manually to prevent the enumerator from getting the current item when it will be skipped:
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
Debug.Assert(ranges == null || ranges.Count > 0);
int currentItem = 0;
Pair<int, int> currentRange = new Pair<int, int>();
int currentRangeIndex = -1;
bool betweenRanges = false;
if (ranges != null)
{
currentRange = ranges[0];
currentRangeIndex = 0;
betweenRanges = currentRange.First > 0;
}
using (IEnumerator<T> enumerator = source.GetEnumerator())
{
while (enumerator.MoveNext())
{
if (ranges != null)
{
if (betweenRanges)
{
if (currentItem == currentRange.First)
betweenRanges = false;
else
{
currentItem++;
continue;
}
}
}
yield return enumerator.Current;
if (ranges != null)
{
if (currentItem == currentRange.Second)
{
if (currentRangeIndex == ranges.Count - 1)
break; // We just visited the last item in the ranges
currentRangeIndex = currentRangeIndex + 1;
currentRange = ranges[currentRangeIndex];
betweenRanges = true;
}
}
currentItem++;
}
}
}
My second try, this will consider the ordering of the ranges. I haven' tried it yet but I thinkt it works :). You could probably extract some of the code to smaller functions to make it more readable.
public static IEnumerable<T> WalkRanges<T>(IEnumerable<T> source, List<Pair<int, int>> ranges)
{
int currentIndex = 0;
int currentRangeIndex = 0;
int maxRangeIndex = ranges.Length;
bool done = false;
foreach(var item in source)
{
if(currentIndex > range[currentRangeIndex].Second)
{
while(currentIndex > range[currentRangeIndex].Second)
{
if(!++currentRangeIndex < maxRangeIndex)
{
// We've passed last range =>
// set done = true to break outer loop and then break
done = true;
break;
}
}
if(currentIndex > range[currentRangeIndex].First)
yield item; // include if larger than first since we now it's smaller than second
}
else if(currentIndex > range[currentRangeIndex].First)
{
// If higher than first and lower than second we're in range
yield item;
}
if(done) // if done break outer loop
break;
currentIndex++; // always increase index when advancint through source
}
}
A project using .NET Compact Framework does not allow use System.Web library.
How can I do to HTMLDecode a string using .NET Compact Framework?
I didn't found any class that do that.
If you decompile the .NET Framework source you can get the methods you need. I did it for you, so you can copy and paste the two classes below:
internal class HtmlEntities
{
// Fields
private static string[] _entitiesList = new string[] {
"\"-quot", "&-amp", "<-lt", ">-gt", "\x00a0-nbsp", "\x00a1-iexcl", "\x00a2-cent", "\x00a3-pound", "\x00a4-curren", "\x00a5-yen", "\x00a6-brvbar", "\x00a7-sect", "\x00a8-uml", "\x00a9-copy", "\x00aa-ordf", "\x00ab-laquo",
"\x00ac-not", "\x00ad-shy", "\x00ae-reg", "\x00af-macr", "\x00b0-deg", "\x00b1-plusmn", "\x00b2-sup2", "\x00b3-sup3", "\x00b4-acute", "\x00b5-micro", "\x00b6-para", "\x00b7-middot", "\x00b8-cedil", "\x00b9-sup1", "\x00ba-ordm", "\x00bb-raquo",
"\x00bc-frac14", "\x00bd-frac12", "\x00be-frac34", "\x00bf-iquest", "\x00c0-Agrave", "\x00c1-Aacute", "\x00c2-Acirc", "\x00c3-Atilde", "\x00c4-Auml", "\x00c5-Aring", "\x00c6-AElig", "\x00c7-Ccedil", "\x00c8-Egrave", "\x00c9-Eacute", "\x00ca-Ecirc", "\x00cb-Euml",
"\x00cc-Igrave", "\x00cd-Iacute", "\x00ce-Icirc", "\x00cf-Iuml", "\x00d0-ETH", "\x00d1-Ntilde", "\x00d2-Ograve", "\x00d3-Oacute", "\x00d4-Ocirc", "\x00d5-Otilde", "\x00d6-Ouml", "\x00d7-times", "\x00d8-Oslash", "\x00d9-Ugrave", "\x00da-Uacute", "\x00db-Ucirc",
"\x00dc-Uuml", "\x00dd-Yacute", "\x00de-THORN", "\x00df-szlig", "\x00e0-agrave", "\x00e1-aacute", "\x00e2-acirc", "\x00e3-atilde", "\x00e4-auml", "\x00e5-aring", "\x00e6-aelig", "\x00e7-ccedil", "\x00e8-egrave", "\x00e9-eacute", "\x00ea-ecirc", "\x00eb-euml",
"\x00ec-igrave", "\x00ed-iacute", "\x00ee-icirc", "\x00ef-iuml", "\x00f0-eth", "\x00f1-ntilde", "\x00f2-ograve", "\x00f3-oacute", "\x00f4-ocirc", "\x00f5-otilde", "\x00f6-ouml", "\x00f7-divide", "\x00f8-oslash", "\x00f9-ugrave", "\x00fa-uacute", "\x00fb-ucirc",
"\x00fc-uuml", "\x00fd-yacute", "\x00fe-thorn", "\x00ff-yuml", "Œ-OElig", "œ-oelig", "Š-Scaron", "š-scaron", "Ÿ-Yuml", "ƒ-fnof", "ˆ-circ", "˜-tilde", "Α-Alpha", "Β-Beta", "Γ-Gamma", "Δ-Delta",
"Ε-Epsilon", "Ζ-Zeta", "Η-Eta", "Θ-Theta", "Ι-Iota", "Κ-Kappa", "Λ-Lambda", "Μ-Mu", "Ν-Nu", "Ξ-Xi", "Ο-Omicron", "Π-Pi", "Ρ-Rho", "Σ-Sigma", "Τ-Tau", "Υ-Upsilon",
"Φ-Phi", "Χ-Chi", "Ψ-Psi", "Ω-Omega", "α-alpha", "β-beta", "γ-gamma", "δ-delta", "ε-epsilon", "ζ-zeta", "η-eta", "θ-theta", "ι-iota", "κ-kappa", "λ-lambda", "μ-mu",
"ν-nu", "ξ-xi", "ο-omicron", "π-pi", "ρ-rho", "ς-sigmaf", "σ-sigma", "τ-tau", "υ-upsilon", "φ-phi", "χ-chi", "ψ-psi", "ω-omega", "ϑ-thetasym", "ϒ-upsih", "ϖ-piv",
" -ensp", " -emsp", " -thinsp", "-zwnj", "-zwj", "-lrm", "-rlm", "–-ndash", "—-mdash", "‘-lsquo", "’-rsquo", "‚-sbquo", "“-ldquo", "”-rdquo", "„-bdquo", "†-dagger",
"‡-Dagger", "•-bull", "…-hellip", "‰-permil", "′-prime", "″-Prime", "‹-lsaquo", "›-rsaquo", "‾-oline", "⁄-frasl", "€-euro", "ℑ-image", "℘-weierp", "ℜ-real", "™-trade", "ℵ-alefsym",
"←-larr", "↑-uarr", "→-rarr", "↓-darr", "↔-harr", "↵-crarr", "⇐-lArr", "⇑-uArr", "⇒-rArr", "⇓-dArr", "⇔-hArr", "∀-forall", "∂-part", "∃-exist", "∅-empty", "∇-nabla",
"∈-isin", "∉-notin", "∋-ni", "∏-prod", "∑-sum", "−-minus", "∗-lowast", "√-radic", "∝-prop", "∞-infin", "∠-ang", "∧-and", "∨-or", "∩-cap", "∪-cup", "∫-int",
"∴-there4", "∼-sim", "≅-cong", "≈-asymp", "≠-ne", "≡-equiv", "≤-le", "≥-ge", "⊂-sub", "⊃-sup", "⊄-nsub", "⊆-sube", "⊇-supe", "⊕-oplus", "⊗-otimes", "⊥-perp",
};
private static Hashtable _entitiesLookupTable;
private static object _lookupLockObject = new object();
internal static char Lookup(string entity)
{
if (_entitiesLookupTable == null)
{
lock (_lookupLockObject)
{
if (_entitiesLookupTable == null)
{
Hashtable hashtable = new Hashtable();
foreach (string str in _entitiesList)
{
hashtable[str.Substring(2)] = str[0];
}
_entitiesLookupTable = hashtable;
}
}
}
object obj2 = _entitiesLookupTable[entity];
if (obj2 != null)
{
return (char)obj2;
}
return '\0';
}
}
public sealed class HttpUtility
{
private static char[] s_entityEndingChars = new char[] { ';', '&' };
public static string HtmlDecode(string s)
{
if (s == null)
{
return null;
}
if (s.IndexOf('&') < 0)
{
return s;
}
StringBuilder sb = new StringBuilder();
StringWriter output = new StringWriter(sb);
HtmlDecode(s, output);
return sb.ToString();
}
public static void HtmlDecode(string s, TextWriter output)
{
if (s != null)
{
if (s.IndexOf('&') < 0)
{
output.Write(s);
}
else
{
int length = s.Length;
for (int i = 0; i < length; i++)
{
char ch = s[i];
if (ch == '&')
{
int num3 = s.IndexOfAny(s_entityEndingChars, i + 1);
if ((num3 > 0) && (s[num3] == ';'))
{
string entity = s.Substring(i + 1, (num3 - i) - 1);
if ((entity.Length > 1) && (entity[0] == '#'))
{
try
{
if ((entity[1] == 'x') || (entity[1] == 'X'))
{
ch = (char)int.Parse(entity.Substring(2), NumberStyles.AllowHexSpecifier);
}
else
{
ch = (char)int.Parse(entity.Substring(1));
}
i = num3;
}
catch (FormatException)
{
i++;
}
catch (ArgumentException)
{
i++;
}
}
else
{
i = num3;
char ch2 = HtmlEntities.Lookup(entity);
if (ch2 != '\0')
{
ch = ch2;
}
else
{
output.Write('&');
output.Write(entity);
output.Write(';');
goto Label_0103;
}
}
}
}
output.Write(ch);
Label_0103: ;
}
}
}
}
}
You could always port the code from Mono, sample VB code or sample Java code found with a simple web search.
I've used reflector in the past to find just the methods I need from the framework that aren't included and compile them directly into my compact.net project.