ByteArrayOutputStream in C# - c#

I have the following code in Java:
public static byte[] hex(String hex) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int nexti = 0;
int nextb = 0;
boolean highoc = true;
outer:
while (true)
{
int number = -1;
while (number == -1) {
if (nexti == hex.length()) {
break outer;
}
char chr = hex.charAt(nexti);
if ((chr >= '0') && (chr <= '9'))
number = chr - '0';
else if ((chr >= 'a') && (chr <= 'f'))
number = chr - 'a' + 10;
else if ((chr >= 'A') && (chr <= 'F'))
number = chr - 'A' + 10;
else {
number = -1;
}
nexti++;
}
if (highoc) {
nextb = number << 4;
highoc = false;
} else {
nextb |= number;
highoc = true;
baos.write(nextb);
}
}
label161: return baos.toByteArray();
}
I'm trying to convert it to C#, and failing, because MemoryStream is the only option, and I don't have a buffer.
This is what I have now:
public static byte[] fromString(string hex)
{
MemoryStream baos = new MemoryStream();
int nexti = 0;
int nextb = 0;
bool highoc = true;
for (; ; )
{
int number = -1;
while (number == -1)
{
if (nexti == hex.Length)
{
goto END;
}
char chr = hex.ToCharArray()[nexti];
if (chr >= '0' && chr <= '9')
{
number = chr - '0';
}
else if (chr >= 'a' && chr <= 'f')
{
number = chr - 'a' + 10;
}
else if (chr >= 'A' && chr <= 'F')
{
number = chr - 'A' + 10;
}
else
{
number = -1;
}
nexti++;
}
if (highoc)
{
nextb = number << 4;
highoc = false;
}
else
{
nextb |= number;
highoc = true;
baos.Write(nextb);
}
}
END:
return baos.toByteArray();
}
What else can I do to make it work like the way in Java?.. Thanks.

Here is something similar
public static byte[] StringToByteArrayFastest(string hex) {
if (hex.Length % 2 == 1)
throw new Exception("The binary key cannot have an odd number of digits");
byte[] arr = new byte[hex.Length >> 1];
for (int i = 0; i < hex.Length >> 1; ++i)
{
arr[i] = (byte)((GetHexVal(hex[i << 1]) << 4) + (GetHexVal(hex[(i << 1) + 1])));
}
return arr;
}
public static int GetHexVal(char hex) {
int val = (int)hex;
//For uppercase A-F letters:
return val - (val < 58 ? 48 : 55);
//For lowercase a-f letters:
//return val - (val < 58 ? 48 : 87);
//Or the two combined, but a bit slower:
//return val - (val < 58 ? 48 : (val < 97 ? 55 : 87));
}
or
public static byte[] StringToByteArray(String hex)
{
int NumberChars = hex.Length;
byte[] bytes = new byte[NumberChars / 2];
for (int i = 0; i < NumberChars; i += 2)
bytes[i / 2] = Convert.ToByte(hex.Substring(i, 2), 16);
return bytes;
}
or
private byte[] HexStringToByteArray(string hexString)
{
int hexStringLength = hexString.Length;
byte[] b = new byte[hexStringLength / 2];
for (int i = 0; i < hexStringLength; i += 2)
{
int topChar = (hexString[i] > 0x40 ? hexString[i] - 0x37 : hexString[i] - 0x30) << 4;
int bottomChar = hexString[i + 1] > 0x40 ? hexString[i + 1] - 0x37 : hexString[i + 1] - 0x30;
b[i / 2] = Convert.ToByte(topChar + bottomChar);
}
return b;
}
Here is alot more of them.
How do you convert Byte Array to Hexadecimal String, and vice versa?

Related

How to write a code for my decrypt without special character

I am only able to encrypt, but i do not how to decrypt. Someone please help. Do I have to declare a bool variable?
Or is that any other better way to do it?
string UserInput = "";
int shift;
Shift OBSHIFT = new Shift();
Console.Write("\nType a string to encrypt:");
UserInput = Console.ReadLine();
Console.Write("How many chars would you like to shift?: ");
shift = int.Parse(Console.ReadLine());
Console.WriteLine("\nApplying Caesar cipher ... ");
Console.Write("Your encrypted string is: ");
Console.WriteLine(OBSHIFT.Cshift(UserInput, shift));
Console.Read();
}
}
class Shift
{
public string Cshift(string str, int shift )
{
string UserOutput = "";
char[] A = null;
A = str.ToCharArray();
int temp;
for (int i = 0; i < str.Length; i++)
{
char c = A[i];
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
{
temp = (int)(A[i] + shift);
if ((c >= 'A' && c <= 'Z' && temp > 'Z') || (c >= 'a' && c <= 'z' && temp > 'z'))
temp = temp - 26;
else
temp = (int)(A[i] + (shift));
}
else
temp = c;
UserOutput += (char)temp;
}
return UserOutput;
}
}
}
}
Talking about Caesar cipher, you can simply negate the shift and get the original string.
I.e., cshift(cshift(string, x), -x) == string.
Using your Shift class:
int sh = 17;
string original = "abcdefgh";
string encrypted = shift.Cshift(original, sh);
string decrypted = shift.Cshift(shifted, -sh);
Console.WriteLine(decrypted == original); // true
For convenience, you can create a method Decrypt, which will do this:
class Shift
{
public string Encrypt(string originalString, int shift)
{
string userOutput = "";
char[] a = originalString.ToCharArray();
for (int i = 0; i < originalString.Length; i++)
{
char c = a[i];
int temp;
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
{
temp = (int)(a[i] + shift);
if ((c >= 'A' && c <= 'Z' && temp > 'Z') || (c >= 'a' && c <= 'z' && temp > 'z'))
temp = temp - 26;
else
temp = (int)(a[i] + (shift));
}
else
temp = c;
userOutput += (char)temp;
}
return userOutput;
}
public string Decrypt(string cipherString, int shift)
{
return Encrypt(cipherString, -shift);
}
}
Note that I have also done some little code improvements like:
combined declaration and assignment of A
moved temp into the inner scope
gave the proper names to the local variables (lower-case)

find common substrings in 2 string in c#

I have strings like:
1) Cookie:ystat_tw_ss376223=9_16940400_234398;
2) Cookie:zynga_toolbar_fb_uid=1018132522
3) GET /2009/visuels/Metaboli_120x600_UK.gif HTTP/1.1
4) GET /2010/07/15/ipad-3hk-smv-price-hk/ HTTP/1.1
1 ad 2 have common substtring{cookie:}
3 and 4 have common substtring{GET /20, HTTP/1.1}
I want to find all common substrings that have the length more than three characters(contain space character) between 2 strings.(like 1 and 2)
i want to code in c#. i have a program but it has some problems.
Could anyone help me?
public static string[] MyMCS2(string a, string b)
{
string[] st = new string[100];
// List<string> st = new List<string>();
List<char> f = new List<char>();
int ctr = 0;
char[] str1 = a.ToCharArray();
char[] str2 = b.ToCharArray();
int m = 0;
int n = 0;
while (m < str1.Length)
{
for (n = 0; n < str2.Length; n++)
{
if (m < str1.Length)
{
if (str1[m] == str2[n])
{
if ((m > 1) && (n > 1) &&(str1[m - 1] == str2[n - 1]) && (str1[m - 2] == str2[n - 2]))
{
//f[m]= str1[m];
f.Add(str1[m]);
char[] ff = f.ToArray();
string aaa = new string(ff);
if (aaa.Length >= 3)
{
st[ctr] = aaa + "()";
//st.Add(aaa);
ctr++;
}
kk = m;
m++;
}
else if ((n == 0) ||(n == 1))
{
f.Add(str1[m]);
kk = m;
m++;
}
else
f.Clear();
}
//else if ((str1[m] == str2[n]) && (m == str1.Length - 1) && (n == str2.Length - 1))
//{
// f.Add(str1[m]);
// char[] ff = f.ToArray();
// string aaa = new string(ff);
// if (aaa.Length >= 3)
// {
// st[ctr] = aaa;
// ctr++;
// }
// // m++;
//}
else if ((str1[m] != str2[n]) && (n == (str2.Length - 1)))
{
m++;
}
else if ((m > 1) && (n > 1) && (str1[m] != str2[n]) && (str1[m - 1] == str2[n - 1]) && (str1[m - 2] == str2[n - 2]) && (str1[m - 3] == str2[n - 3]))
{
//
char[] ff = f.ToArray();
string aaa = new string(ff);
if (aaa.Length >= 3)
{
st[ctr] = aaa + "()" ;
//st.Add(aaa);
ctr++;
f.Clear();
}
//f.Clear();
//for (int h = 0; h < ff.Length; h++)
//{
// f[h] = '\0';
//}
}
else if (str1[m] != str2[n])
continue;
}
}
}
//int gb = st.Length;
return st;
}
This is an exact matching problem not a substring. You can solve it with aho-corasick algorithm. Use the first string and compute a finite state machine. Then process the search string. You can extend the aho-corasick algorithm to use a wildcard and search also for substrings. You can try this animated example: http://blog.ivank.net/aho-corasick-algorithm-in-as3.html

Compare string using bitwise shift operation

So i am new to csharp and i cant seem to find a logical error here in this program.i am learning the bitwise shift operators as i am new to these operators. I need help tracing a fault in my code. the program encodes an input String and decodes the encoded String after.After that i compare the string to see if they are equal.They seem to be equal to me but i keep getting a false when i compare them. Here is my code:
class Program
{
static char[] transcode = new char[64];
private static void prep()
{
for (int i = 0; i < transcode.Length; i++)
{
transcode[i] = (char)((int)'A' + i);
if (i > 25 && i <= 51)
{
transcode[i] = (char)((int)transcode[i] + 6);
}
else if (i > 51)
{
transcode[i] = (char)((int)transcode[i] - 0x4b);
}
}
transcode[transcode.Length - 3] = '+';
transcode[transcode.Length - 2] = '/';
transcode[transcode.Length - 1] = '=';
}
static void Main(string[] args)
{
prep();
string test_string = "a";
if (Convert.ToBoolean(String.Compare(test_string, decode(encode(test_string)))))
{
Console.WriteLine("Test succeeded");
}
else
{
Console.WriteLine("Test failed");
}
}
private static string encode(string input)
{
int l = input.Length;
int cb = (l / 3 + (Convert.ToBoolean(l % 3) ? 1 : 0)) * 4;// (0 +(1))*4 =4
char[] output = new char[cb];
for (int i = 0; i < cb; i++)
{
output[i] = '=';
}
int c = 0;
int reflex = 0;
const int s = 0x3f;
for (int j = 0; j < l; j++)
{
reflex <<= 8;
reflex &= 0x00ffff00;
reflex += input[j];
int x = ((j % 3) + 1) * 2;
int mask = s << x;
while (mask >= s)
{
int pivot = (reflex & mask) >> x;
output[c++] = transcode[pivot];
char alpha = transcode[pivot];
int invert = ~mask;
reflex &= invert;
mask >>= 6;
x -= 6; //-4
}
}
switch (l % 3)
{
case 1:
reflex <<= 4; //16
output[c++] = transcode[reflex];
char at16 = transcode[16];
// Console.WriteLine("Character at 16 is: " + at16);
break;
case 2:
reflex <<= 2;
output[c++] = transcode[reflex];
break;
}
return new string(output);//final value is: YQ== (Encoded String.)
}
private static string decode(string input)//input is YQ== which has a length of 4
{
int l = input.Length;
int cb = (l / 4 + ((Convert.ToBoolean(l % 4)) ? 1 : 0)) * 3 + 1; // (1 + (0))*4
char[] output = new char[cb]; //4 in length
int c = 0;
int bits = 0;
int reflex = 0;
for (int j = 0; j < l; j++)
{
reflex <<= 6;
bits += 6;
bool fTerminate = ('=' == input[j]);
if (!fTerminate)
{
reflex += indexOf(input[j]);
while (bits >= 8)
{
int mask = 0x000000ff << (bits % 8);
output[c++] = (char)((reflex & mask) >> (bits % 8)); //convert issue cannot implicitly convert to proper data type.so will have to explicitly convert.
int invert = ~mask;
reflex &= invert;
bits -= 8;
}
}
else
{
break;
}
}
return new string(output);
}
private static int indexOf(char ch)
{
int index;
for (index = 0; index < transcode.Length; index++)
if (ch == transcode[index])
break;
return index;
}
}
Read the docs for String.Compare then read the docs for Convert.ToBoolean. Pay particular attention to the value returned by String.Compare when two strings are equal. Then compare with how that value gets converted to a boolean by ToBoolean
String.Compare is designed for sorting strings. It returns 0 when two strings are equal. ToBoolean will convert that 0 to false. So when you strings are equal, your if evaluates to false and not true.
A simple change would be:
if (String.Compare(test_string, decode(encode(test_string)))==0)
{
Console.WriteLine("Test succeeded");
}
else
{
Console.WriteLine("Test failed");
}
#Tom's comment about the trailing nulls also applies, but it seems that String.Compare just ignores them.

Get longest substring between two strings

I have two words,
britanicaeng and
britanicahin
I need to find out the longest common word between these i.e, britanica.
How can I do this in C# ?
Try this method:
public static string FindLongestCommonSubstring(string s1, string s2)
{
int[,] a = new int[s1.Length + 1, s2.Length + 1];
int row = 0; // s1 index
int col = 0; // s2 index
for (var i = 0; i < s1.Length; i++)
for (var j = 0; j < s2.Length; j++)
if (s1[i] == s2[j])
{
int len = a[i + 1, j + 1] = a[i, j] + 1;
if (len > a[row, col])
{
row = i + 1;
col = j + 1;
}
}
return s1.Substring(row - a[row, col], a[row, col]);
}
Usage example:
Console.WriteLine(FindLongestCommonSubstring("britanicaeng", "britanicahin"));
I refactored the C++ code from Ashutosh Singh at https://iq.opengenus.org/longest-common-substring-using-rolling-hash/ to create a rolling hash approach in C# - this will find the substring in O(N * log(N)^2) time and O(N) space
using System;
using System.Collections.Generic;
public class RollingHash
{
private class RollingHashPowers
{
// _mod = prime modulus of polynomial hashing
// any prime number over a billion should suffice
internal const int _mod = (int)1e9 + 123;
// _hashBase = base (point of hashing)
// this should be a prime number larger than the number of characters used
// in my use case I am only interested in ASCII (256) characters
// for strings in languages using non-latin characters, this should be much larger
internal const long _hashBase = 257;
// _pow1 = powers of base modulo mod
internal readonly List<int> _pow1 = new List<int> { 1 };
// _pow2 = powers of base modulo 2^64
internal readonly List<long> _pow2 = new List<long> { 1L };
internal void EnsureLength(int length)
{
if (_pow1.Capacity < length)
{
_pow1.Capacity = _pow2.Capacity = length;
}
for (int currentIndx = _pow1.Count - 1; currentIndx < length; ++currentIndx)
{
_pow1.Add((int)(_pow1[currentIndx] * _hashBase % _mod));
_pow2.Add(_pow2[currentIndx] * _hashBase);
}
}
}
private class RollingHashedString
{
readonly RollingHashPowers _pows;
readonly int[] _pref1; // Hash on prefix modulo mod
readonly long[] _pref2; // Hash on prefix modulo 2^64
// Constructor from string:
internal RollingHashedString(RollingHashPowers pows, string s, bool caseInsensitive = false)
{
_pows = pows;
_pref1 = new int[s.Length + 1];
_pref2 = new long[s.Length + 1];
const long capAVal = 'A';
const long capZVal = 'Z';
const long aADif = 'a' - 'A';
unsafe
{
fixed (char* c = s)
{
// Fill arrays with polynomial hashes on prefix
for (int i = 0; i < s.Length; ++i)
{
long v = c[i];
if (caseInsensitive && capAVal <= v && v <= capZVal)
{
v += aADif;
}
_pref1[i + 1] = (int)((_pref1[i] + v * _pows._pow1[i]) % RollingHashPowers._mod);
_pref2[i + 1] = _pref2[i] + v * _pows._pow2[i];
}
}
}
}
// Rollingnomial hash of subsequence [pos, pos+len)
// If mxPow != 0, value automatically multiply on base in needed power.
// Finally base ^ mxPow
internal Tuple<int, long> Apply(int pos, int len, int mxPow = 0)
{
int hash1 = _pref1[pos + len] - _pref1[pos];
long hash2 = _pref2[pos + len] - _pref2[pos];
if (hash1 < 0)
{
hash1 += RollingHashPowers._mod;
}
if (mxPow != 0)
{
hash1 = (int)((long)hash1 * _pows._pow1[mxPow - (pos + len - 1)] % RollingHashPowers._mod);
hash2 *= _pows._pow2[mxPow - (pos + len - 1)];
}
return Tuple.Create(hash1, hash2);
}
}
private readonly RollingHashPowers _rhp;
public RollingHash(int longestLength = 0)
{
_rhp = new RollingHashPowers();
if (longestLength > 0)
{
_rhp.EnsureLength(longestLength);
}
}
public string FindCommonSubstring(string a, string b, bool caseInsensitive = false)
{
// Calculate max neede power of base:
int mxPow = Math.Max(a.Length, b.Length);
_rhp.EnsureLength(mxPow);
// Create hashing objects from strings:
RollingHashedString hash_a = new RollingHashedString(_rhp, a, caseInsensitive);
RollingHashedString hash_b = new RollingHashedString(_rhp, b, caseInsensitive);
// Binary search by length of same subsequence:
int pos = -1;
int low = 0;
int minLen = Math.Min(a.Length, b.Length);
int high = minLen + 1;
var tupleCompare = Comparer<Tuple<int, long>>.Default;
while (high - low > 1)
{
int mid = (low + high) / 2;
List<Tuple<int, long>> hashes = new List<Tuple<int, long>>(a.Length - mid + 1);
for (int i = 0; i + mid <= a.Length; ++i)
{
hashes.Add(hash_a.Apply(i, mid, mxPow));
}
hashes.Sort(tupleCompare);
int p = -1;
for (int i = 0; i + mid <= b.Length; ++i)
{
if (hashes.BinarySearch(hash_b.Apply(i, mid, mxPow), tupleCompare) >= 0)
{
p = i;
break;
}
}
if (p >= 0)
{
low = mid;
pos = p;
}
else
{
high = mid;
}
}
// Output answer:
return pos >= 0
? b.Substring(pos, low)
: string.Empty;
}
}

Faster parsing of numbers on .NET

I have written two functions that convert a string of whitespace-separated integers into an int array. The first function uses Substring and then applies System.Int32.Parse to convert the substring into an int value:
let intsOfString (s: string) =
let ints = ResizeArray()
let rec inside i j =
if j = s.Length then
ints.Add(s.Substring(i, j-i) |> System.Int32.Parse)
else
let c = s.[j]
if '0' <= c && c <= '9' then
inside i (j+1)
else
ints.Add(s.Substring(i, j-i) |> System.Int32.Parse)
outside (j+1)
and outside i =
if i < s.Length then
let c = s.[i]
if '0' <= c && c <= '9' then
inside i (i+1)
else
outside (i+1)
outside 0
ints.ToArray()
The second function traverses the characters of the string in-place accumulating the integer without creating a temporary substring:
let intsOfString (s: string) =
let ints = ResizeArray()
let rec inside n i =
if i = s.Length then
ints.Add n
else
let c = s.[i]
if '0' <= c && c <= '9' then
inside (10*n + int c - 48) (i+1)
else
ints.Add n
outside(i+1)
and outside i =
if i < s.Length then
let c = s.[i]
if '0' <= c && c <= '9' then
inside (int c - 48) (i+1)
else
outside (i+1)
outside 0
ints.ToArray()
Benchmarking on space-separated integers 1 to 1,000,000, the first version takes 1.5s whereas the second version takes 0.3s.
Parsing such values can be performance critical so leaving 5x performance on the table by using temporary substrings can be undesirable. Parsing integers is easy but parsing other values such as floating point numbers, decimals and dates is considerably harder.
So, are there built-in functions to parse directly from a substring within a string (i.e. using the given start and length of a string) in order to avoid generating a temporary string? If not, are there any libraries that provide efficient functions to do this?
System.Int32.Parse is slowlest, because it used CultureInfo, FormatInfo and etc; and performance reason is not in the temporary strings.
Code from reflection:
private unsafe static bool ParseNumber(ref char* str, NumberStyles options, ref Number.NumberBuffer number, NumberFormatInfo numfmt, bool parseDecimal)
{
number.scale = 0;
number.sign = false;
string text = null;
string text2 = null;
string str2 = null;
string str3 = null;
bool flag = false;
string str4;
string str5;
if ((options & NumberStyles.AllowCurrencySymbol) != NumberStyles.None)
{
text = numfmt.CurrencySymbol;
if (numfmt.ansiCurrencySymbol != null)
{
text2 = numfmt.ansiCurrencySymbol;
}
str2 = numfmt.NumberDecimalSeparator;
str3 = numfmt.NumberGroupSeparator;
str4 = numfmt.CurrencyDecimalSeparator;
str5 = numfmt.CurrencyGroupSeparator;
flag = true;
}
else
{
str4 = numfmt.NumberDecimalSeparator;
str5 = numfmt.NumberGroupSeparator;
}
int num = 0;
char* ptr = str;
char c = *ptr;
while (true)
{
if (!Number.IsWhite(c) || (options & NumberStyles.AllowLeadingWhite) == NumberStyles.None || ((num & 1) != 0 && ((num & 1) == 0 || ((num & 32) == 0 && numfmt.numberNegativePattern != 2))))
{
bool flag2;
char* ptr2;
if ((flag2 = (((options & NumberStyles.AllowLeadingSign) == NumberStyles.None) ? false : ((num & 1) == 0))) && (ptr2 = Number.MatchChars(ptr, numfmt.positiveSign)) != null)
{
num |= 1;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (flag2 && (ptr2 = Number.MatchChars(ptr, numfmt.negativeSign)) != null)
{
num |= 1;
number.sign = true;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (c == '(' && (options & NumberStyles.AllowParentheses) != NumberStyles.None && (num & 1) == 0)
{
num |= 3;
number.sign = true;
}
else
{
if ((text == null || (ptr2 = Number.MatchChars(ptr, text)) == null) && (text2 == null || (ptr2 = Number.MatchChars(ptr, text2)) == null))
{
break;
}
num |= 32;
text = null;
text2 = null;
ptr = ptr2 - (IntPtr)2 / 2;
}
}
}
}
c = *(ptr += (IntPtr)2 / 2);
}
int num2 = 0;
int num3 = 0;
while (true)
{
if ((c >= '0' && c <= '9') || ((options & NumberStyles.AllowHexSpecifier) != NumberStyles.None && ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))))
{
num |= 4;
if (c != '0' || (num & 8) != 0)
{
if (num2 < 50)
{
number.digits[(IntPtr)(num2++)] = c;
if (c != '0' || parseDecimal)
{
num3 = num2;
}
}
if ((num & 16) == 0)
{
number.scale++;
}
num |= 8;
}
else
{
if ((num & 16) != 0)
{
number.scale--;
}
}
}
else
{
char* ptr2;
if ((options & NumberStyles.AllowDecimalPoint) != NumberStyles.None && (num & 16) == 0 && ((ptr2 = Number.MatchChars(ptr, str4)) != null || (flag && (num & 32) == 0 && (ptr2 = Number.MatchChars(ptr, str2)) != null)))
{
num |= 16;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if ((options & NumberStyles.AllowThousands) == NumberStyles.None || (num & 4) == 0 || (num & 16) != 0 || ((ptr2 = Number.MatchChars(ptr, str5)) == null && (!flag || (num & 32) != 0 || (ptr2 = Number.MatchChars(ptr, str3)) == null)))
{
break;
}
ptr = ptr2 - (IntPtr)2 / 2;
}
}
c = *(ptr += (IntPtr)2 / 2);
}
bool flag3 = false;
number.precision = num3;
number.digits[(IntPtr)num3] = '\0';
if ((num & 4) != 0)
{
if ((c == 'E' || c == 'e') && (options & NumberStyles.AllowExponent) != NumberStyles.None)
{
char* ptr3 = ptr;
c = *(ptr += (IntPtr)2 / 2);
char* ptr2;
if ((ptr2 = Number.MatchChars(ptr, numfmt.positiveSign)) != null)
{
c = *(ptr = ptr2);
}
else
{
if ((ptr2 = Number.MatchChars(ptr, numfmt.negativeSign)) != null)
{
c = *(ptr = ptr2);
flag3 = true;
}
}
if (c >= '0' && c <= '9')
{
int num4 = 0;
do
{
num4 = num4 * 10 + (int)(c - '0');
c = *(ptr += (IntPtr)2 / 2);
if (num4 > 1000)
{
num4 = 9999;
while (c >= '0' && c <= '9')
{
c = *(ptr += (IntPtr)2 / 2);
}
}
}
while (c >= '0' && c <= '9');
if (flag3)
{
num4 = -num4;
}
number.scale += num4;
}
else
{
ptr = ptr3;
c = *ptr;
}
}
while (true)
{
if (!Number.IsWhite(c) || (options & NumberStyles.AllowTrailingWhite) == NumberStyles.None)
{
bool flag2;
char* ptr2;
if ((flag2 = (((options & NumberStyles.AllowTrailingSign) == NumberStyles.None) ? false : ((num & 1) == 0))) && (ptr2 = Number.MatchChars(ptr, numfmt.positiveSign)) != null)
{
num |= 1;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (flag2 && (ptr2 = Number.MatchChars(ptr, numfmt.negativeSign)) != null)
{
num |= 1;
number.sign = true;
ptr = ptr2 - (IntPtr)2 / 2;
}
else
{
if (c == ')' && (num & 2) != 0)
{
num &= -3;
}
else
{
if ((text == null || (ptr2 = Number.MatchChars(ptr, text)) == null) && (text2 == null || (ptr2 = Number.MatchChars(ptr, text2)) == null))
{
break;
}
text = null;
text2 = null;
ptr = ptr2 - (IntPtr)2 / 2;
}
}
}
}
c = *(ptr += (IntPtr)2 / 2);
}
if ((num & 2) == 0)
{
if ((num & 8) == 0)
{
if (!parseDecimal)
{
number.scale = 0;
}
if ((num & 16) == 0)
{
number.sign = false;
}
}
str = ptr;
return true;
}
}
str = ptr;
return false;
}
public static int Parse(string s)
{
return Number.ParseInt32(s, NumberStyles.Integer, NumberFormatInfo.CurrentInfo);
}
internal unsafe static int ParseInt32(string s, NumberStyles style, NumberFormatInfo info)
{
byte* stackBuffer = stackalloc byte[1 * 114 / 1];
Number.NumberBuffer numberBuffer = new Number.NumberBuffer(stackBuffer);
int result = 0;
Number.StringToNumber(s, style, ref numberBuffer, info, false);
if ((style & NumberStyles.AllowHexSpecifier) != NumberStyles.None)
{
if (!Number.HexNumberToInt32(ref numberBuffer, ref result))
{
throw new OverflowException(Environment.GetResourceString("Overflow_Int32"));
}
}
else
{
if (!Number.NumberToInt32(ref numberBuffer, ref result))
{
throw new OverflowException(Environment.GetResourceString("Overflow_Int32"));
}
}
return result;
}
private unsafe static void StringToNumber(string str, NumberStyles options, ref Number.NumberBuffer number, NumberFormatInfo info, bool parseDecimal)
{
if (str == null)
{
throw new ArgumentNullException("String");
}
fixed (char* ptr = str)
{
char* ptr2 = ptr;
if (!Number.ParseNumber(ref ptr2, options, ref number, info, parseDecimal) || ((ptr2 - ptr / 2) / 2 < str.Length && !Number.TrailingZeros(str, (ptr2 - ptr / 2) / 2)))
{
throw new FormatException(Environment.GetResourceString("Format_InvalidString"));
}
}
}
I've written this one for doubles, that doesn't create a temporary substring. It's meant to be used inside a JSON parser so it limits itself to how doubles can be represented in JSON according to http://www.json.org/.
It's not optimal yet because it requires you to know where the number begins and ends (begin and end parameters), so you'll have to traverse the length of the number twice to find out where it ends. It's still around 10-15x faster than double.Parse and it could be fairly easily modified that it finds the end inside the function which is then returned as an out parameter to know where you have to resume parsing the main string.
Used like so:
Parsers.TryParseDoubleFastStream("1", 0, 1, out j);
Parsers.TryParseDoubleFastStream("2.0", 0, 3, out j);
Parsers.TryParseDoubleFastStream("3.5", 0, 3, out j);
Parsers.TryParseDoubleFastStream("-4.5", 0, 4, out j);
Parsers.TryParseDoubleFastStream("50.06", 0, 5, out j);
Parsers.TryParseDoubleFastStream("1000.65", 0, 7, out j);
Parsers.TryParseDoubleFastStream("-10000.8600", 0, 11, out j);
Code can be found here:
https://gist.github.com/3010984 (would be too lengthy to post here).
And StandardFunctions.IgnoreChar is for my purpose as simple as:
public static bool IgnoreChar(char c)
{
return c < 33;
}
Paste all this code into C# and call Test(). This is as close as you can get to operating directly on the string array to parse numbers using C#. It is built for speed, not elegance. The ParseInt and ParseFloat function were created for an OpenGL graphics engine to import vectors from text-based 3d models. Parsing floats is a significant bottleneck in that process. This was as fast as I could make it.
using System.Diagnostics;
private void Test()
{
Stopwatch sw = new Stopwatch();
StringBuilder sb = new StringBuilder();
int iterations = 1000;
// Build a string of 1000000 space separated numbers
for (var n = 0; n < iterations; n++)
{
if (n > 0)
sb.Append(' ');
sb.Append(n.ToString());
}
string numberString = sb.ToString();
// Time the process
sw.Start();
StringToInts(numberString, iterations);
//StringToFloats(numberString, iterations);
sw.Stop();
long proc1 = sw.ElapsedMilliseconds;
Console.WriteLine("iterations: {0} \t {1}ms", iterations, proc1);
}
private unsafe int[] StringToInts(string s, int length)
{
int[] ints = new int[length];
int index = 0;
int startpos = 0;
fixed (char* pStringBuffer = s)
{
fixed (int* pIntBuffer = ints)
{
for (int n = 0; n < s.Length; n++)
{
if (s[n] == ' ' || n == s.Length - 1)
{
if (n == s.Length - 1)
n++;
// pIntBuffer[index++] = int.Parse(new string(pStringBuffer, startpos, n - startpos));
pIntBuffer[index++] = ParseInt((pStringBuffer + startpos), n - startpos);
startpos = n + 1;
}
}
}
}
return ints;
}
private unsafe float[] StringToFloats(string s, int length)
{
float[] floats = new float[length];
int index = 0;
int startpos = 0;
fixed (char* pStringBuffer = s)
{
fixed (float* pFloatBuffer = floats)
{
for (int n = 0; n < s.Length; n++)
{
if (s[n] == ' ' || n == s.Length - 1)
{
if (n == s.Length - 1)
n++;
pFloatBuffer[index++] = ParseFloat((pStringBuffer + startpos), n - startpos); // int.Parse(new string(pStringBuffer, startpos, n - startpos));
startpos = n + 1;
}
}
}
}
return floats;
}
public static unsafe int ParseInt(char* input, int len)
{
int pos = 0; // read pointer position
int part = 0; // the current part (int, float and sci parts of the number)
bool neg = false; // true if part is a negative number
int* ret = stackalloc int[1];
while (pos < len && (*(input + pos) > '9' || *(input + pos) < '0') && *(input + pos) != '-')
pos++;
// sign
if (*(input + pos) == '-')
{
neg = true;
pos++;
}
// integer part
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
part = part * 10 + (input[pos++] - '0');
*ret = neg ? (part * -1) : part;
return *ret;
}
public static unsafe float ParseFloat(char* input, int len)
{
//float ret = 0f; // return value
int pos = 0; // read pointer position
int part = 0; // the current part (int, float and sci parts of the number)
bool neg = false; // true if part is a negative number
float* ret = stackalloc float[1];
// find start
while (pos < len && (input[pos] < '0' || input[pos] > '9') && input[pos] != '-' && input[pos] != '.')
pos++;
// sign
if (input[pos] == '-')
{
neg = true;
pos++;
}
// integer part
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
part = part * 10 + (input[pos++] - '0');
*ret = neg ? (float)(part * -1) : (float)part;
// float part
if (pos < len && input[pos] == '.')
{
pos++;
double mul = 1;
part = 0;
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
{
part = part * 10 + (input[pos] - '0');
mul *= 10;
pos++;
}
if (neg)
*ret -= (float)part / (float)mul;
else
*ret += (float)part / (float)mul;
}
// scientific part
if (pos < len && (input[pos] == 'e' || input[pos] == 'E'))
{
pos++;
neg = (input[pos] == '-'); pos++;
part = 0;
while (pos < len && !(input[pos] > '9' || input[pos] < '0'))
{
part = part * 10 + (input[pos++] - '0');
}
if (neg)
*ret /= (float)Math.Pow(10d, (double)part);
else
*ret *= (float)Math.Pow(10d, (double)part);
}
return (float)*ret;
}
So, are there built-in functions to parse directly from a substring within a string (i.e.
using the given start and length of a string) in order to avoid generating a temporary
string? If not, are there any libraries that provide efficient functions to do this?
It seems that you want to use a lexing buffer and a lexer, similar to what OCaml can provide with ocamllex and the Lexbuf buffer. (I cannot provide references for F#.)
If your benchmark involving a huge string of integers separated by other tokens is your typical case, it will work well. But in other situations, it could be impractical.
Not sure if this is any good, but have you tried something like:
var stringValues = input.split(" ");
var intValues = Array.ConvertAll(stringValues, s => int.Parse(s));

Categories

Resources