My current project handles large numbers of incoming radio messages (~5M per day) represented as strings that must be then divided into pre-determined sized chunks, ready for storage.
For example, a message will come in the following format:
mzIIIICCssss
Each different char represents a chunk, so this example holds 5 chunks (m, z, IIII, CC, ssss).
An example of a message using the format could be:
.91234NE0001 (., 9, 1234, NE, 0001)
I've used substring so far but have been told this is not as efficient as say regular expressions. If this is the case, how can I use regex to match at char positions, instead of a semantic pattern?
Substring is much faster than a regex. Since all you are trying to do is separate a string into fixed-size chunks, just use Substring.
cHao's comment gave me another idea. You could use the string(char[], int, int) constructor, somewhat like this:
string message = ".91234NE0001";
char[] messageArr = message.ToCharArray();
string chunk1 = new string(messageArr, 0, 1);
string chunk2 = new string(messageArr, 1, 1);
string chunk3 = new string(messageArr, 2, 4);
string chunk4 = new string(messageArr, 6, 2);
string chunk5 = new string(messageArr, 8, 4);
You can probably give the variables better names :)
This is the manual way of doing what Substring is doing. I thought it would be faster than the Substring method, but I was thinking of the wrong approach previously. It will probably be about the same speed.
I think the most efficient that you'll be able to achieve without dropping to native code is with unsafe code.
private static IEnumerable<string> ExtractChunksUnsafe(string format, string data)
{
if(format.Length != data.Length)
throw new ArgumentException("Format length must match Data length");
if(data.Length == 0)
throw new ArgumentException("Invalid Data length");
char prevFormat = '\0';
char currentFormat = format[0];
var chunks = new List<string>();
var builder = new StringBuilder();
unsafe
{
fixed(char * indexer = data)
{
var index = -1;
while(data.Length > ++index)
{
prevFormat = currentFormat;
currentFormat = format[index];
if(currentFormat != prevFormat)
{
chunks.Add(builder.ToString());
builder.Clear();
}
builder.Append((*(indexer + index)));
}
chunks.Add(builder.ToString());
builder.Clear();
}
}
return chunks;
}
Comparison:
private static IEnumerable<string> ExtractChunks(string format, string data)
{
if(format.Length != data.Length)
throw new ArgumentException("Format length must match Data length");
if(data.Length == 0)
throw new ArgumentException("Invalid Data length");
char prevFormat = '\0';
char currentFormat = format[0];
var prevIndex = 0;
var index = 1;
var message = data.ToCharArray();
var chunks = new List<string>();
while(data.Length > index)
{
prevFormat = currentFormat;
currentFormat = format[index];
if(currentFormat != prevFormat)
{
chunks.Add(new string(message, prevIndex, index - prevIndex));
prevIndex = index;
}
index++;
}
chunks.Add(new string(message, prevIndex, index - prevIndex));
return chunks;
}
Sample:
string format = "mzIIIICCssss";
string data = ".a9876NE9001";
var chunks = ExtractChunks(format, data);
foreach(var message in chunks)
{
Console.WriteLine(message);
}
Benchmark:
string format = "mzIIIICCssss";
string data = ".a9876NE9001";
// Warmup CLR
ExtractChunksUnsafe(format, data);
ExtractChunks(format, data);
TimeSpan unsafeCode;
TimeSpan safeCode;
var timer = Stopwatch.StartNew();
for(int i = 0; i < 10000000; i++)
{
ExtractChunksUnsafe(format, data);
}
unsafeCode = timer.Elapsed;
timer.Restart();
for(int i = 0; i < 10000000; i++)
{
ExtractChunks(format, data);
}
safeCode = timer.Elapsed;
timer.Stop();
Console.WriteLine("Unsafe time {0}", unsafeCode);
Console.WriteLine("Safe time {0}", safeCode);
Result:
Unsafe time 00:00:04.8551136
Safe time 00:00:03.1786573
Even modifying the Unsafe body:
unsafe
{
fixed(char * indexer = data)
{
var prevIndex = 0;
var index = 1;
while(data.Length > index)
{
prevFormat = currentFormat;
currentFormat = format[index];
if(currentFormat != prevFormat)
{
chunks.Add(new string(indexer, prevIndex, index - prevIndex));
prevIndex = index;
}
index++;
}
chunks.Add(new string(indexer, prevIndex, index - prevIndex));
}
}
Will still result in slower time Unsafe time 00:00:03.4565302.
Ignoring questions of which solution is most efficient, here is a regex that will match the format given in the question (mzIIIICCssss)
(?<m>.)(?<z>.)(?<IIII>.{4})(?<CC>.{2})(?<ssss>.{4})
This will capture one character in a group called "m", the next character in a group called "z", the next 4 characters in a group called "IIII", the next 2 in "CC", and the next 4 in "ssss".
As far as performance, if the code you have right now isn't fast enough, and you've determined the string handling is the problem by profiling it, then look for faster replacements.
Related
I know how to do a string split if there's a letter, number, that I want to replace.
But how could I do a string.Split() by 2 char counts without replacing any existing letters, number, etc...?
Example:
string MAC = "00122345"
I want that string to output: 00:12:23:45
You could create a LINQ extension method to give you an IEnumerable<string> of parts:
public static class Extensions
{
public static IEnumerable<string> SplitNthParts(this string source, int partSize)
{
if (string.IsNullOrEmpty(source))
{
throw new ArgumentException("String cannot be null or empty.", nameof(source));
}
if (partSize < 1)
{
throw new ArgumentException("Part size has to be greater than zero.", nameof(partSize));
}
return Enumerable
.Range(0, (source.Length + partSize - 1) / partSize)
.Select(pos => source
.Substring(pos * partSize,
Math.Min(partSize, source.Length - pos * partSize)));
}
}
Usage:
var strings = new string[] {
"00122345",
"001223453"
};
foreach (var str in strings)
{
Console.WriteLine(string.Join(":", str.SplitNthParts(2)));
}
// 00:12:23:45
// 00:12:23:45:3
Explanation:
Use Enumerable.Range to get number of positions to slice string. In this case its the length of the string + chunk size - 1, since we need to get a big enough range to also fit leftover chunk sizes.
Enumerable.Select each position of slicing and get the startIndex using String.Substring using the position multiplied by 2 to move down the string every 2 characters. You will have to use Math.Min to calculate the smallest size leftover size if the string doesn't have enough characters to fit another chunk. You can calculate this by the length of the string - current position * chunk size.
String.Join the final result with ":".
You could also replace the LINQ query with yield here to increase performance for larger strings since all the substrings won't be stored in memory at once:
for (var pos = 0; pos < source.Length; pos += partSize)
{
yield return source.Substring(pos, Math.Min(partSize, source.Length - pos));
}
You can use something like this:
string newStr= System.Text.RegularExpressions.Regex.Replace(MAC, ".{2}", "$0:");
To trim the last colon, you can use something like this.
newStr.TrimEnd(':');
Microsoft Document
Try this way.
string MAC = "00122345";
MAC = System.Text.RegularExpressions.Regex.Replace(MAC,".{2}", "$0:");
MAC = MAC.Substring(0,MAC.Length-1);
Console.WriteLine(MAC);
A quite fast solution, 8-10x faster than the current accepted answer (regex solution) and 3-4x faster than the LINQ solution
public static string Format(this string s, string separator, int length)
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.Length; i += length)
{
sb.Append(s.Substring(i, Math.Min(s.Length - i, length)));
if (i < s.Length - length)
{
sb.Append(separator);
}
}
return sb.ToString();
}
Usage:
string result = "12345678".Format(":", 2);
Here is a one (1) line alternative using LINQ Enumerable.Aggregate.
string result = MAC.Aggregate("", (acc, c) => acc.Length % 3 == 0 ? acc += c : acc += c + ":").TrimEnd(':');
An easy to understand and simple solution.
This is a simple fast modified answer in which you can easily change the split char.
This answer also checks if the number is even or odd , to make the suitable string.Split().
input : 00122345
output : 00:12:23:45
input : 0012234
output : 00:12:23:4
//The List that keeps the pairs
List<string> MACList = new List<string>();
//Split the even number into pairs
for (int i = 1; i <= MAC.Length; i++)
{
if (i % 2 == 0)
{
MACList.Add(MAC.Substring(i - 2, 2));
}
}
//Make the preferable output
string output = "";
for (int j = 0; j < MACList.Count; j++)
{
output = output + MACList[j] + ":";
}
//Checks if the input string is even number or odd number
if (MAC.Length % 2 == 0)
{
output = output.Trim(output.Last());
}
else
{
output += MAC.Last();
}
//input : 00122345
//output : 00:12:23:45
//input : 0012234
//output : 00:12:23:4
string num = db.SelectNums(id);
string[] numArr = num.Split('-').ToArray();
string num contain for an example "48030-48039";
string[] numArr will therefor contain (48030, 48039).
Now I have two elements, a high and low. I now need to get ALL the numbers from 48030 to 48039. The issue is that it has to be string since there will be telephone numbers with leading zeroes now and then.
Thats why I cannot use Enumerable.Range().ToArray() for this.
Any suggestions? The expected result should be 48030, 48031, 48032, ... , 48039
This should work with your leading zero requirement:
string num = db.SelectNums(id);
string[] split = num.Split('-');
long start = long.Parse(split[0]);
long end = long.Parse(split[1]);
bool includeLeadingZero = split[0].StartsWith("0");
List<string> results = new List<string>();
for(int i = start; i <= end; i++)
{
string result = includeLeadingZero ? "0" : "";
result += i.ToString();
results.Add(result);
}
string[] arrayResults = results.ToArray();
A few things to note:
It assumes your input will be 2 valid integers split by a single hyphen
I am giving you a string array result, personally I would prefer to work with a List<int> in the end
If the first number contains a single leading zero, then all results will contain a single leading zero
It uses long to cater for longer numbers, beware that the max number that will parse is 9,223,372,036,854,775,807, you could double this value (not length) by using ulong
Are you saying this?
int[] nums = new int[numArr.Length];
for (int i = 0; i < numArr.Length; i++)
{
nums[i] = Convert.ToInt32(numArr[i]);
}
Array.Sort(nums);
for (int n = nums[0]; n <= nums[nums.Length - 1]; n++)
{
Console.WriteLine(n);
}
here link
I am expecting your string always have valid two integers, so using Parse instead TryParse
string[] strList = "48030-48039".Split('-').ToArray();
var lst = strList.Select(int.Parse).ToList();
var min = lst.OrderBy(l => l).FirstOrDefault();
var max = lst.OrderByDescending(l => l).FirstOrDefault();
var diff = max - min;
//adding 1 here otherwise 48039 will not be there
var rng = Enumerable.Range(min,diff+1);
If you expecting invalid string
var num = 0;
var lst = (from s in strList where int.TryParse(s, out num) select num).ToList();
This is one way to do it:
public static string[] RangeTest()
{
Boolean leadingZero = false;
string num = "048030-48039"; //db.SelectNums(id);
if (num.StartsWith("0"))
leadingZero = true;
int min = int.Parse(num.Split('-').Min());
int count = int.Parse(num.Split('-').Max()) - min;
if (leadingZero)
return Enumerable.Range(min, count).Select(x => "0" + x.ToString()).ToArray();
else
return Enumerable.Range(min, count).Select(x => "" + x.ToString()).ToArray(); ;
}
You can use string.Format to ensure numbers are formatted with leading zeros. That will make the method work with arbitrary number of leading zeros.
private static string CreateRange(string num)
{
var tokens = num.Split('-').Select(s => s.Trim()).ToArray();
// use UInt64 to allow huge numbers
var start = UInt64.Parse(tokens[0]);
var end = UInt64.Parse(tokens[1]);
// if your start number is '000123', this will create
// a format string with 6 zeros ('000000')
var format = new string('0', tokens[0].Length);
// use StringBuilder to make GC happy.
// (if only there was a Enumerable.Range<ulong> overload...)
var sb = new StringBuilder();
for (var i = start; i <= end; i++)
{
// format ensures that your numbers are padded properly
sb.Append(i.ToString(format));
sb.Append(", ");
}
// trim trailing comma after the last element
if (sb.Length >= 2) sb.Length -= 2;
return sb.ToString();
}
Usage example:
// prints 0000012, 0000013, 0000014
Console.WriteLine( CreateRange("0000012-0000014") );
Three significant issues were brought up in comments:
The phone numbers have enough digits to exceed Int32.MaxValue so
converting to int isn't viable.
The phone numbers can have leading zeros (presumeably for some
international calling?)
The possible range of numbers can theoretically exceed the maximum size of an array (which may have memory issues, and I think may not be represented as a string)
As such, you may need to use long instead of int, and I would suggest using deferred execution if needed for very large ranges.
public static IEnumerable<string> EnumeratePhoneNumbers(string fromAndTo)
{
var split = fromAndTo.Split('-');
return EnumeratePhoneNumbers(split[0], split[1]);
}
public static IEnumerable<string> EnumeratePhoneNumbers(string fromString, string toString)
{
long from = long.Parse(fromString);
long to = long.Parse(toString);
int totalNumberLength = fromString.Length;
for (long phoneNumber = from; phoneNumber <= to; phoneNumber++)
{
yield return phoneNumber.ToString().PadLeft(totalNumberLength, '0');
}
}
This assumes that the padded zeros are already included in the lower bound fromString text. It will iterate and yield out numbers as you need them. This can be useful if you're churning out a lot of numbers and don't need to fill up memory with them, or if you just need the first 10 or 100. For example:
var first100Numbers =
EnumeratePhoneNumbers("0018155500-7018155510")
.Take(100)
.ToArray();
Normally that range would produce 7 billion results which cannot be stored in an array, and might run into memory issues (I'm not even sure if it can be stored in a string); by using deferred execution, you only create the 100 needed.
If you do have a small range, you can still join up your results into a string as you desired:
string numberRanges = String.Join(", ", EnumeratePhoneNumbers("0018155500-0018155510"));
And naturally you can put this array creation into your own helper method:
public static string GetPhoneNumbersListing(string fromAndTo)
{
return String.Join(", ", EnumeratePhoneNumbers("0018155500-0018155510"));
}
So your usage would be:
string numberRanges = GetPhoneNumbersListing("0018155500-0018155510");
A complete solution inspired by the answer by #Dan-o:
Inputs:
Start: 48030
End: 48039
Digits: 6
Expected String Output:
048030, 048031, 048032, 048033, 048034, 048035, 048036, 048037, 048038, 048039
Program:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
public class Program
{
public static void Main()
{
int first = 48030;
int last = 48039;
int digits = 6;
Console.WriteLine(CreateRange(first, last, digits));
}
public static string CreateRange(int first, int last, int numDigits)
{
string separator = ", ";
var sb = new StringBuilder();
sb.Append(first.ToString().PadLeft(numDigits, '0'));
foreach (int num in Enumerable.Range(first + 1, last - first))
{
sb.Append(separator + num.ToString().PadLeft(numDigits, '0'));
}
return sb.ToString();
}
}
For Each item In Enumerable.Range(min, count).ToArray()
something = item.PadLeft(5, "0")
Next
I have a string which contains binary digits. How to separate string after each 8 digit?
Suppose the string is:
string x = "111111110000000011111111000000001111111100000000";
I want to add a separator like ,(comma) after each 8 character.
output should be :
"11111111,00000000,11111111,00000000,11111111,00000000,"
Then I want to send it to a list<> last 8 char 1st then the previous 8 chars(excepting ,) and so on.
How can I do this?
Regex.Replace(myString, ".{8}", "$0,");
If you want an array of eight-character strings, then the following is probably easier:
Regex.Split(myString, "(?<=^(.{8})+)");
which will split the string only at points where a multiple of eight characters precede it.
Try this:
var s = "111111110000000011111111000000001111111100000000";
var list = Enumerable
.Range(0, s.Length/8)
.Select(i => s.Substring(i*8, 8));
var res = string.Join(",", list);
There's another Regex approach:
var str = "111111110000000011111111000000001111111100000000";
# for .NET 4
var res = String.Join(",",Regex.Matches(str, #"\d{8}").Cast<Match>());
# for .NET 3.5
var res = String.Join(",", Regex.Matches(str, #"\d{8}")
.OfType<Match>()
.Select(m => m.Value).ToArray());
...or old school:
public static List<string> splitter(string in, out string csv)
{
if (in.length % 8 != 0) throw new ArgumentException("in");
var lst = new List<string>(in/8);
for (int i=0; i < in.length / 8; i++) lst.Add(in.Substring(i*8,8));
csv = string.Join(",", lst); //This we want in input order (I believe)
lst.Reverse(); //As we want list in reverse order (I believe)
return lst;
}
Ugly but less garbage:
private string InsertStrings(string s, int insertEvery, char insert)
{
char[] ins = s.ToCharArray();
int length = s.Length + (s.Length / insertEvery);
if (ins.Length % insertEvery == 0)
{
length--;
}
var outs = new char[length];
long di = 0;
long si = 0;
while (si < s.Length - insertEvery)
{
Array.Copy(ins, si, outs, di, insertEvery);
si += insertEvery;
di += insertEvery;
outs[di] = insert;
di ++;
}
Array.Copy(ins, si, outs, di, ins.Length - si);
return new string(outs);
}
String overload:
private string InsertStrings(string s, int insertEvery, string insert)
{
char[] ins = s.ToCharArray();
char[] inserts = insert.ToCharArray();
int insertLength = inserts.Length;
int length = s.Length + (s.Length / insertEvery) * insert.Length;
if (ins.Length % insertEvery == 0)
{
length -= insert.Length;
}
var outs = new char[length];
long di = 0;
long si = 0;
while (si < s.Length - insertEvery)
{
Array.Copy(ins, si, outs, di, insertEvery);
si += insertEvery;
di += insertEvery;
Array.Copy(inserts, 0, outs, di, insertLength);
di += insertLength;
}
Array.Copy(ins, si, outs, di, ins.Length - si);
return new string(outs);
}
If I understand your last requirement correctly (it's not clear to me if you need the intermediate comma-delimited string or not), you could do this:
var enumerable = "111111110000000011111111000000001111111100000000".Batch(8).Reverse();
By utilizing morelinq.
Here my two little cents too. An implementation using StringBuilder:
public static string AddChunkSeparator (string str, int chunk_len, char separator)
{
if (str == null || str.Length < chunk_len) {
return str;
}
StringBuilder builder = new StringBuilder();
for (var index = 0; index < str.Length; index += chunk_len) {
builder.Append(str, index, chunk_len);
builder.Append(separator);
}
return builder.ToString();
}
You can call it like this:
string data = "111111110000000011111111000000001111111100000000";
string output = AddChunkSeparator(data, 8, ',');
One way using LINQ:
string data = "111111110000000011111111000000001111111100000000";
const int separateOnLength = 8;
string separated = new string(
data.Select((x,i) => i > 0 && i % separateOnLength == 0 ? new [] { ',', x } : new [] { x })
.SelectMany(x => x)
.ToArray()
);
I did it using Pattern & Matcher as following way:
fun addAnyCharacter(input: String, insertion: String, interval: Int): String {
val pattern = Pattern.compile("(.{$interval})", Pattern.DOTALL)
val matcher = pattern.matcher(input)
return matcher.replaceAll("$1$insertion")
}
Where:
input indicates Input string. Check results section.
insertion indicates Insert string between those characters. For example comma (,), start(*), hash(#).
interval indicates at which interval you want to add insertion character.
input indicates Input string. Check results section. Check results section; here I've added insertion at every 4th character.
Results:
I/P: 1234XXXXXXXX5678 O/P: 1234 XXXX XXXX 5678
I/P: 1234567812345678 O/P: 1234 5678 1234 5678
I/P: ABCDEFGHIJKLMNOP O/P: ABCD EFGH IJKL MNOP
Hope this helps.
As of .Net 6, you can simply use the IEnumerable.Chunk method (Which splits elements of a sequence into chunks) then reconcatenate the chunks using String.Join.
var text = "...";
string.Join(',', text.Chunk(size: 6).Select(x => new string(x)));
This is much faster without copying array (this version inserts space every 3 digits but you can adjust it to your needs)
public string GetString(double valueField)
{
char[] ins = valueField.ToString().ToCharArray();
int length = ins.Length + (ins.Length / 3);
if (ins.Length % 3 == 0)
{
length--;
}
char[] outs = new char[length];
int i = length - 1;
int j = ins.Length - 1;
int k = 0;
do
{
if (k == 3)
{
outs[i--] = ' ';
k = 0;
}
else
{
outs[i--] = ins[j--];
k++;
}
}
while (i >= 0);
return new string(outs);
}
For every 1 character, you could do this one-liner:
string.Join(".", "1234".ToArray()) //result: 1.2.3.4
If you intend to create your own function to acheive this without using regex or pattern matching methods, you can create a simple function like this:
String formatString(String key, String seperator, int afterEvery){
String formattedKey = "";
for(int i=0; i<key.length(); i++){
formattedKey += key.substring(i,i+1);
if((i+1)%afterEvery==0)
formattedKey += seperator;
}
if(formattedKey.endsWith("-"))
formattedKey = formattedKey.substring(0,formattedKey.length()-1);
return formattedKey;
}
Calling the mothod like this
formatString("ABCDEFGHIJKLMNOPQRST", "-", 4)
Would result in the return string as this
ABCD-EFGH-IJKL-MNOP-QRST
A little late to the party, but here's a simplified LINQ expression to break an input string x into groups of n separated by another string sep:
string sep = ",";
int n = 8;
string result = String.Join(sep, x.InSetsOf(n).Select(g => new String(g.ToArray())));
A quick rundown of what's happening here:
x is being treated as an IEnumerable<char>, which is where the InSetsOf extension method comes in.
InSetsOf(n) groups characters into an IEnumerable of IEnumerable -- each entry in the outer grouping contains an inner group of n characters.
Inside the Select method, each group of n characters is turned back into a string by using the String() constructor that takes an array of chars.
The result of Select is now an IEnumerable<string>, which is passed into String.Join to interleave the sep string, just like any other example.
I am more than late with my answer but you can use this one:
static string PutLineBreak(string str, int split)
{
for (int a = 1; a <= str.Length; a++)
{
if (a % split == 0)
str = str.Insert(a, "\n");
}
return str;
}
Suppose I have a string with the text: "THIS IS A TEST". How would I split it every n characters? So if n was 10, then it would display:
"THIS IS A "
"TEST"
..you get the idea. The reason is because I want to split a very big line into smaller lines, sort of like word wrap. I think I can use string.Split() for this, but I have no idea how and I'm confused.
Any help would be appreciated.
Let's borrow an implementation from my answer on code review. This inserts a line break every n characters:
public static string SpliceText(string text, int lineLength) {
return Regex.Replace(text, "(.{" + lineLength + "})", "$1" + Environment.NewLine);
}
Edit:
To return an array of strings instead:
public static string[] SpliceText(string text, int lineLength) {
return Regex.Matches(text, ".{1," + lineLength + "}").Cast<Match>().Select(m => m.Value).ToArray();
}
Maybe this can be used to handle efficiently extreme large files :
public IEnumerable<string> GetChunks(this string sourceString, int chunkLength)
{
using(var sr = new StringReader(sourceString))
{
var buffer = new char[chunkLength];
int read;
while((read= sr.Read(buffer, 0, chunkLength)) == chunkLength)
{
yield return new string(buffer, 0, read);
}
}
}
Actually, this works for any TextReader. StreamReader is the most common used TextReader. You can handle very large text files (IIS Log files, SharePoint Log files, etc) without having to load the whole file, but reading it line by line.
You should be able to use a regex for this. Here is an example:
//in this case n = 10 - adjust as needed
List<string> groups = (from Match m in Regex.Matches(str, ".{1,10}")
select m.Value).ToList();
string newString = String.Join(Environment.NewLine, lst.ToArray());
Refer to this question for details:
Splitting a string into chunks of a certain size
Probably not the most optimal way, but without regex:
string test = "my awesome line of text which will be split every n characters";
int nInterval = 10;
string res = String.Concat(test.Select((c, i) => i > 0 && (i % nInterval) == 0 ? c.ToString() + Environment.NewLine : c.ToString()));
Coming back to this after doing a code review, there's another way of doing the same without using Regex
public static IEnumerable<string> SplitText(string text, int length)
{
for (int i = 0; i < text.Length; i += length)
{
yield return text.Substring(i, Math.Min(length, text.Length - i));
}
}
Some code that I just wrote:
string[] SplitByLength(string line, int len, int IsB64=0) {
int i;
if (IsB64 == 1) {
// Only Allow Base64 Line Lengths without '=' padding
int mod64 = (len % 4);
if (mod64 != 0) {
len = len + (4 - mod64);
}
}
int parts = line.Length / len;
int frac = line.Length % len;
int extra = 0;
if (frac != 0) {
extra = 1;
}
string[] oline = new string[parts + extra];
for(i=0; i < parts; i++) {
oline[i] = line.Substring(0, len);
line = line.Substring(len);
}
if (extra == 1) {
oline[i] = line;
}
return oline;
}
string CRSplitByLength(string line, int len, int IsB64 = 0)
{
string[] lines = SplitByLength(line, len, IsB64);
return string.Join(System.Environment.NewLine, lines);
}
string m = "1234567890abcdefghijklmnopqrstuvwxhyz";
string[] r = SplitByLength(m, 6, 0);
foreach (string item in r) {
Console.WriteLine("{0}", item);
}
I am doing a security presentation for my Computer and Information Security course in a few weeks time, and in this presentation I will be demonstrating the pros and cons of different attacks (dictionary, rainbow and bruteforce). I am do the dictionary and rainbow attacks fine but I need to generate the bruteforce attack on the fly. I need to find an algorithm that will let me cycle though every combination of letter, symbol and number up to a certain character length.
So as an example, for a character length of 12, the first and last few generations will be:
a
ab
abc
abcd
...
...
zzzzzzzzzzzx
zzzzzzzzzzzy
zzzzzzzzzzzz
But it will also use numbers and symbols, so it's quite hard for me to explain... but I think you get the idea. Using only symbols from the ASCII table is fine.
I can kind of picture using an ASCII function to do this with a counter, but I just can't work it out in my head. If anyone could provide some source code (I'll probably be using C#) or even some pseudo code that I can program a function from that'd be great.
Thank you in advance. :)
A recursive function will let you run through all combinations of ValidChars:
int maxlength = 12;
string ValidChars;
private void Dive(string prefix, int level)
{
level += 1;
foreach (char c in ValidChars)
{
Console.WriteLine(prefix + c);
if (level < maxlength)
{
Dive(prefix + c, level);
}
}
}
Assign the set of valid characters to ValidChars, the maximum length of string you want to maxlength, then call Dive("", 0); and away you go.
You need to generate all combinations of characters from a set of valid characters ; let's call this set validChars. Basically, each set of combinations of length N is a cartesian product of validChars with itself, N times. That's pretty easy to do using Linq:
char[] validChars = ...;
var combinationsOfLength1 =
from c1 in validChars
select new[] { c1 };
var combinationsOfLength2 =
from c1 in validChars
from c2 in validChars
select new[] { c1, c2 };
...
var combinationsOfLength12 =
from c1 in validChars
from c2 in validChars
...
from c12 in validChars
select new[] { c1, c2 ... c12 };
var allCombinations =
combinationsOfLength1
.Concat(combinationsOfLength2)
...
.Concat(combinationsOfLength12);
Obviously, you don't want to manually write the code for each length, especially if you don't know in advance the maximum length...
Eric Lippert has an article about generating the cartesian product of an arbitrary number of sequences. Using the CartesianProduct extension method provided by the article, you can generate all combinations of length N as follows:
var combinationsOfLengthN = Enumerable.Repeat(validChars, N).CartesianProduct();
Since you want all combinations from length 1 to MAX, you can do something like that:
var allCombinations =
Enumerable
.Range(1, MAX)
.SelectMany(N => Enumerable.Repeat(validChars, N).CartesianProduct());
allCombinations is an IEnumerable<IEnumerable<char>>, if you want to get the results as a sequence of strings, you just need to add a projection:
var allCombinations =
Enumerable
.Range(1, MAX)
.SelectMany(N => Enumerable.Repeat(validChars, N).CartesianProduct())
.Select(combination => new string(combination.ToArray()));
Note that it's certainly not the most efficient solution, but at least it's short and readable...
You can try this code, that use recursion to print all possible strings of 0 to stringsLenght chars lenght, composed by all combination of chars from firstRangeChar to lastRangeChar.
class BruteWriter
{
static void Main(string[] args)
{
var bw = new BruteWriter();
bw.WriteBruteStrings("");
}
private void WriteBruteStrings(string prefix)
{
Console.WriteLine(prefix);
if (prefix.Length == stringsLenght)
return;
for (char c = firstRangeChar; c <= lastRangeChar; c++)
WriteBruteStrings(prefix + c);
}
char firstRangeChar='A';
char lastRangeChar='z';
int stringsLenght=10;
}
This look to be faster than the solution of #dthorpe.I've compared the algorthms using this code:
class BruteWriter
{
static void Main(string[] args)
{
var st = new Stopwatch();
var bw = new BruteWriter();
st.Start();
bw.WriteBruteStrings("");
Console.WriteLine("First method: " + st.ElapsedMilliseconds);
for (char c = bw.firstRangeChar; c <= bw.lastRangeChar; c++)
bw.ValidChars += c;
st.Start();
bw.Dive("", 0);
Console.WriteLine("Second method: " + st.ElapsedMilliseconds);
Console.ReadLine();
}
private void WriteBruteStrings(string prefix)
{
if (prefix.Length == stringsLenght)
return;
for (char c = firstRangeChar; c <= lastRangeChar; c++)
WriteBruteStrings(prefix + c);
}
char firstRangeChar='A';
char lastRangeChar='R';
int stringsLenght=5;
int maxlength = 5;
string ValidChars;
private void Dive(string prefix, int level)
{
level += 1;
foreach (char c in ValidChars)
{
if (level <= maxlength)
{
Dive(prefix + c, level);
}
}
}
}
and, on my pc, I get these results:
First method: 247
Second method: 910
public void BruteStrings(int maxlength)
{
for(var i=1;i<i<=maxlength;i++)
BruteStrings(Enumerable.Repeat((byte)0,i));
}
public void BruteStrings(byte[] bytes)
{
Console.WriteLine(bytes
.Cast<char>()
.Aggregate(new StringBuilder(),
(sb,c) => sb.Append(c))
.ToString());
if(bytes.All(b=>b.MaxValue)) return;
bytes.Increment();
BruteStrings(bytes);
}
public static void Increment(this byte[] bytes)
{
bytes.Last() += 1;
if(bytes.Last == byte.MinValue)
{
var lastByte = bytes.Last()
bytes = bytes.Take(bytes.Count() - 1).ToArray().Increment();
bytes = bytes.Concat(new[]{lastByte});
}
}
Another alternative i did, that return a string.
I did not care about the performance of the thing since it was not for a real world scenario.
private void BruteForcePass(int maxLength)
{
var tempPass = "";
while (tempPass.Length <= maxLength)
{
tempPass = GetNextString(tempPass);//Use char from 32 to 256
//Do what you want
}
}
private string GetNextString(string initialString, int minChar= 32, int maxChar = 256)
{
char nextChar;
if (initialString.Length == 0)
{
nextChar = (char)minChar;//the initialString Length will increase
}
else if (initialString.Last() == (char)maxChar)
{
nextChar = (char)minChar;
var tempString = initialString.Substring(0, initialString.Length -1);//we need to increment the char just before the last one
initialString = GetNextString(tempString, minChar, maxChar);
}
else
{
nextChar = (char)(initialString.Last() + 1);//Get the lash Char and increment it;
initialString= initialString.Remove(initialString.Length - 1);//Remove the last char.
}
return initialString + nextChar;
}