Unicode to Mazovia Encoding redundant char - c#
I've been dealing with this for a few hours. I'm saving a string containing Polish diacritics ąśółńźć etc. to a file, but the software I must use to read that file reads only in Mazovia encoding, a pretty old encoding and not supported by the Microsoft Encoding class.
A .Net string consists of UTF-16 characters, so I've been using this code to convert from Unicode to Mazovia.
string rekord = (linia.Substring(0, linia.Length - 1)) + Environment.NewLine;
string rekordMazovia = Kodowanie.UnicodeNaMazovia(rekord);
File.AppendAllText(sciezka, rekordMazovia);
public static class Kodowanie {
public static string UnicodeNaMazovia(string tekst) {
return tekst
.Replace((char)0x104, (char)0x8F) //Ą
.Replace((char)0x106, (char)0x95) //Ć
.Replace((char)0x118, (char)0x90) //Ę
.Replace((char)0x141, (char)0x9C) //Ł
.Replace((char)0x143, (char)0xA5) //Ń
.Replace((char)0xD3, (char)0xA3) //Ó
.Replace((char)0x15A, (char)0x98) //Ś
.Replace((char)0x179, (char)0xA0) //Ź
.Replace((char)0x17B, (char)0xA1) //Ż
.Replace((char)0x105, (char)0x86) //ą
.Replace((char)0x107, (char)0x8D) //ć
.Replace((char)0x119, (char)0x91) //ę
.Replace((char)0x142, (char)0x92) //ł
.Replace((char)0x144, (char)0xA4) //ń
.Replace((char)0xF3, (char)0xA2) //ó
.Replace((char)0x15B, (char)0x9E) //ś
.Replace((char)0x17A, (char)0xA6) //ź
.Replace((char)0x17C, (char)0xA7); //ż
}
}
Everything would be fine except after reading the generated file in the application I get one redundant char > before every diacritic. It looks like this
How to get rid of it? How to do it better?
Mazovia encoding is like code page 437 but it has different letters at some positions so you can't use 437.
If you implement MazoviaEncoding, you can easily use
Encoding encoding = new MazoviaEncoding();
String output = "ąśółńźć";
File.WriteAllText(#"test.txt", output, encoding);
//File.AppendAllText(#"test.txt", output, encoding);
// will work just as well, just pass the encoding as 3rd parameter
The file will contain:
0x86 0x9E 0xA2 0x92 0xA4 0xA6 0x8D
Which is correct according to http://en.wikipedia.org/wiki/Mazovia_encoding
The implementation can then be used like other Encoding in C#. For instance, reading the file back works as well:
Encoding encoding = new MazoviaEncoding();
String result = File.ReadAllText(#"test.txt", encoding);
Here's my implementation:
using System.Collections.Generic;
using System.Text;
namespace System.Text {
class MazoviaEncoding : Encoding
{
private static int[] codePoints = {
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F
,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F
,0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F
,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
,0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F
,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F
,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F
,0x00C7,0x00FC,0x00E9,0x00E2,0x00E4,0x00E0,0x0105,0x00E7,0x00EA,0x00EB,0x00E8,0x00EF,0x00EE,0x0107,0x00C4,0x0104
,0x0118,0x0119,0x0142,0x00F4,0x00F6,0x0106,0x00FB,0x00F9,0x015A,0x00D6,0x00DC,0x00A2,0x0141,0x00A5,0x015B,0x0192
,0x0179,0x017B,0x00F3,0x00D3,0x0144,0x0143,0x017A,0x017C,0x00BF,0x2310,0x00AC,0x00BD,0x00BC,0x00A1,0x00AB,0x00BB
,0x2591,0x2592,0x2593,0x2502,0x2524,0x2561,0x2562,0x2556,0x2555,0x2563,0x2551,0x2557,0x255D,0x255C,0x255B,0x2510
,0x2514,0x2534,0x252C,0x251C,0x2500,0x253C,0x255E,0x255F,0x255A,0x2554,0x2569,0x2566,0x2560,0x2550,0x256C,0x2567
,0x2568,0x2564,0x2565,0x2559,0x2558,0x2552,0x2553,0x256B,0x256A,0x2518,0x250C,0x2588,0x2584,0x258C,0x2590,0x2580
,0x03B1,0x00DF,0x0393,0x03C0,0x03A3,0x03C3,0x00B5,0x03C4,0x03A6,0x0398,0x03A9,0x03B4,0x221E,0x03C6,0x03B5,0x2229
,0x2261,0x00B1,0x2265,0x2264,0x2320,0x2321,0x00F7,0x2248,0x00B0,0x2219,0x00B7,0x221A,0x207F,0x00B2,0x25A0,0x00A0
};
private static Dictionary<char, byte> unicodeToByte;
static MazoviaEncoding()
{
unicodeToByte = new Dictionary<char, byte>();
for (int i = 0; i < codePoints.Length; ++i)
{
unicodeToByte.Add((char)codePoints[i], (byte)i);
}
}
public override int GetMaxByteCount(int charCount)
{
if (charCount < 0)
{
throw new ArgumentOutOfRangeException();
}
return charCount;
}
public override int GetMaxCharCount(int byteCount)
{
if (byteCount < 0)
{
throw new ArgumentOutOfRangeException();
}
return byteCount;
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
if( chars == null || bytes == null ) {
throw new ArgumentNullException();
}
if( charIndex + charCount > chars.Length ||
charIndex < 0 ||
byteIndex < 0 ||
byteIndex + charCount > bytes.Length
) {
throw new ArgumentOutOfRangeException();
}
int total = 0;
int j = 0;
for (int i = charIndex; i < charIndex + charCount; ++i)
{
char cur = chars[i];
byte asMazovia;
if (!unicodeToByte.TryGetValue(cur, out asMazovia))
{
asMazovia = (byte)0x003F; // "?"
}
total++;
bytes[j+byteIndex] = asMazovia;
j++;
}
return total;
}
public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )
{
if (chars == null || bytes == null)
{
throw new ArgumentNullException();
}
if ( byteIndex + byteCount > bytes.Length ||
charIndex < 0 ||
byteIndex < 0 ||
charIndex + byteCount > chars.Length
)
{
throw new ArgumentOutOfRangeException();
}
int total = 0;
int j = 0;
for (int i = byteIndex; i < byteIndex + byteCount; ++i)
{
byte cur = bytes[i];
char decoded = (char)codePoints[cur];
total++;
chars[charIndex + j] = decoded;
j++;
}
return total;
}
public override int GetByteCount(char[] charArray, int index, int count)
{
if (charArray == null)
{
throw new ArgumentNullException();
}
if (index + count <= charArray.Length && index >= 0 && count >= 0)
{
return count;
}
else
{
throw new ArgumentOutOfRangeException();
}
}
public override int GetCharCount( byte[] bytes, int index, int count )
{
if (bytes == null)
{
throw new ArgumentNullException();
}
if (index < 0 || count < 0 || index + count > bytes.Length)
{
throw new ArgumentOutOfRangeException();
}
return count;
}
}
}
Related
C# RNGCryptoServiceProvider and special characters
I'm looking for a way to get random characters.I need a string must be contain at 2 least uppercase letters, at least 1 number and special characters. Here is my code: public static string CreateRandomPassword(int Length) { string _Chars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ[_!23456790"; Byte[] randomBytes = new Byte[Length]; var rng = new RNGCryptoServiceProvider(); rng.GetBytes(randomBytes); var chars = new char[Length]; int Count = _Chars.Length; for(int i = 0;i<Length;i++) { chars[i] = _Chars[(int)randomBytes[i] % Count]; } return new string(chars); } some results: ZNQzvUPFKOL3x BQSEkKHXACGO They haven't special characters and numbers.
your code works great! I've just wrapped it with a function that validate your conditions. I've executed the following: public static string CreateRandomPassword(int Length) { string _Chars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ[_!23456790"; Byte[] randomBytes = new Byte[Length]; var rng = new RNGCryptoServiceProvider(); rng.GetBytes(randomBytes); var chars = new char[Length]; int Count = _Chars.Length; for (int i = 0; i < Length; i++) { chars[i] = _Chars[(int)randomBytes[i] % Count]; } return new string(chars); } public static string CreateRandomPasswordWith2UpperAnd1NumberAnd1Special(int length) { while (true) { var pass = CreateRandomPassword(length); int upper=0, num =0, special = 0,lower=0; foreach (var c in pass) { if (c > 'A' && c < 'Z') { upper++; } else if (c > 'a' && c < 'z') { lower++; } else if (c > '0' && c < '9') { num++; } else { special++; } } if (upper>=2&&num>=1&&1>=special) { return pass; } } } [Test] public void CreateRandomPassword_Length13_RandomPasswordWithNumbers() { var random = CreateRandomPasswordWith2UpperAnd1NumberAnd1Special(13); Assert.IsTrue(true); }
Largest substring composed of identical characters
I want to develop method that will return the length of largest substring composed of identical characters form string that is passed as argument, but without using any of .NET libraries. For example if we pass aaacccccdefffgg as parameter the biggest substring is ccccc and method should return 5. Here is my working solution : public static int GetMaxSubstringLenght(char[] myArray) { int max = 0; for (int i = 0; i < myArray.Length-1; i++) { if (myArray.Length == 0) { return 0; } else { int j = i + 1; int currentMax = 1; // string has some value, so we start with 1 while (myArray[i] == myArray[j]) { currentMax++; if (max < currentMax) { max = currentMax; } j++; } } } return max; } The code above will return expected result, but there will be some unnecessary iteration in for loop that I want to avoid. In first iteration when i=0it will compare it until j=2 and then will get out of while loop and start second iteration in for loop comparing the one at [1] index with [2], which we already did in previous iteration.So basically, when first iteration is completed, next one should start from the last value of j. How can I achieve that ? Thank You in advance.
Since you want "Largest substring..." let's take String as argument and return String public static String GetMaxSubstring(String value) { if (String.IsNullOrEmpty(value)) return ""; int bestCount = 0; char bestChar = '\0'; int currentCount = 0; char current = '\0'; for (int i = 0; i < value.Length; ++i) { if ((i == 0) || (value[i] != current)) currentCount = 0; currentCount += 1; current = value[i]; if (currentCount > bestCount) { bestCount = currentCount; bestChar = current; } } return new String(bestChar, bestCount); } .... // "ccccc" String result = GetMaxSubstring("aaacccccdefffgg"); // 5 int length = result.Length;
Another approach: public static int MaxSubstringLength(string s) { if (string.IsNullOrEmpty(s)) return 0; int max = 0, cur = 1; for (int i = 1; i < s.Length; ++i, ++cur) { if (s[i] != s[i-1]) { max = cur > max ? cur : max; cur = 0; } } return cur > max ? cur : max; } [EDIT] Simplified the code. [EDIT2] Simplified the code further.
you also can do it with one loop: public static int GetMaxSubstringLenght(char[] myArray) { int max = 0; char currentchar = myArray[0]; int count = 1; for each(char c in myArray) { if(currentchar != c) { count = 1; currentchar = c; } if(count > max) { max = count; } count++; } return max; } I changed the code... now this code does not use math.max and I think I eleminated the mistake... I've no IDE at the moment to test it
public static int GetMaxSubstringLenght(char[] myArray) { if (myArray.Length == 0) return 0; if (myArray.Length == 1) return 1; int max = 1; int localMax = 1; for (int i = 0; i < myArray.Length - max; i++ ) { if (myArray[i] == myArray[i + 1]) { localMax++; } else { max = Math.Max(max, localMax); localMax = 1; } } return Math.Max(max, localMax); }
static int LongestCharSequence(string s) { if (string.IsNullOrEmpty(s)) return 0; var prevChar = '\0'; int cmax = 0; int max = 1; foreach (char c in s) { if (c != prevChar) { cmax = 1; prevChar = c; } else { if (++cmax > max) max = cmax; } } return max; }
recursion! static int LongestCharSequence(string s) { int i = (s?.Length ?? 0) == 0 ? 0 : 1; for (; i < s?.Length; i++) if (s[i] != s[i - 1]) return Math.Max(i, LongestCharSequence(s.Substring(i))); return i; }
Another solution using my favorite nested loop technique: public static int MaxSubstringLength(string s) { int maxLength = 0; for (int length = s != null ? s.Length : 0, pos = 0; pos < length;) { int start = pos; while (++pos < length && s[pos] == s[start]) { } maxLength = Math.Max(maxLength, pos - start); } return maxLength; }
Dynamic Bit Shifting / Unshifting
i'd like to store multiple values of blocks into an integer in the same way as i can convert an ip into an int. My problem is that i need to do this for n blocks, not only 4. Also i need to specifiy the maximum value of each block, which is allways the same for all blocks. So, for the Example below, if i'd like to store the whole IP range my BlockCount is 4 and my BlockSize is 255, which is the max value of each block. But it seems not to work if i lower my BlockSize and/or BlockCount. This is my first try, but it's not working correctly: const int BlockSize = 100; const int BlockCount = 3; int shiftedValues = Shift(BlockSize, BlockCount); for (int shiftedValue = 1; shiftedValue <= shiftedValues; shiftedValue++) { for (int index = 0; index <= BlockCount; index++) { int blockValue = Unshift(index, shiftedValue); } } private static int Shift(int blockSize, int blockCount) { int result = 0; for (int i = 0; i < blockCount; i++) { result += ( blockSize << 8 * i ); } return result; } private static int Unshift(int blockIndex, int shiftedValue) { return ( shiftedValue >> ( blockIndex * 8 ) ) & 0xFF; }
The code above is my solution, it's pretty simple code, but feel free to ask clarification about it. class Program { static void Main(string[] args) { int [] items = { 150 , 78 , 44 } ; int x = Program.Pack ( items , 150 ) ; int [] unpacked = Program.UnPack ( x , 150 , 3 ) ; } public static int Pack ( int[] blocks , int blockSize ) { int size = (int)Math.Ceiling(Math.Log(blockSize, 2)); int len = size * blocks.Length; if (len > 32) throw new Exception("Int Limit Exceeded"); if ( blocks.Any ( x => x > blockSize ) ) throw new Exception ( "There are some blocks that exceede the maximum block size" ); List<bool> bools = new List<bool>(); bools = bools.InitBoolArray(32); int i = 0 ; foreach (int block in blocks) { BitArray temp = block.ToBinary().Take(size); for ( int j = 0 ; j < size ; i++ , j++ ) bools[i] = temp.Get(j); } return (new BitArray ( bools.ToArray() ) ).ToNumeral() ; } public static int[] UnPack ( int entry , int blockSize , int blockCount ) { BitArray number = entry.ToBinary(); int size = (int)Math.Ceiling(Math.Log(blockSize, 2)); if (size > 32) throw new Exception("Int Limit Exceeded"); List<int> result = new List<int>(); for (int i = 0; i < blockCount; i++) { BitArray temp = number.Take(size); number = number.Shift (size ); result.Add(temp.FitSize(32).ToNumeral()); } return result.ToArray() ; } } There extension method used public static class BinaryConverter { public static BitArray ToBinary(this int numeral) { return new BitArray(new[] { numeral }); } public static int ToNumeral(this BitArray binary) { if (binary == null) throw new ArgumentNullException("binary"); if (binary.Length > 32) throw new ArgumentException("must be at most 32 bits long"); var result = new int[1]; binary.CopyTo(result, 0); return result[0]; } public static BitArray Take (this BitArray current, int length ) { if (current.Length < length) throw new Exception("Invalid length parameter"); List<bool> taken = new List<bool>(); for (int i = 0; i < length; i++) taken.Add(current.Get(i)); return new BitArray(taken.ToArray()); } public static BitArray Shift (this BitArray current, int length ) { if (current.Length < length) throw new Exception("Invalid length parameter"); List<bool> shifted = new List<bool>(); for (int i = 0; i < current.Length - length; i++) shifted.Add(current.Get(length + i)); return new BitArray(shifted.ToArray()); } public static BitArray FitSize (this BitArray current, int size) { List<bool> bools = new List<bool>() ; bools = bools.InitBoolArray(size); for (int i = 0; i < current.Count; i++) bools[i] = current.Get(i) ; return new BitArray(bools.ToArray()); } public static List<bool> InitBoolArray(this List<bool> current, int size) { List<bool> bools = new List<bool> (); for (int i = 0; i < size; i++) bools.Add(false); return bools ; } }
Determine the unique string from a repeating string in C#
I need to develop an efficient algorithm for determining the unique (repeated) string given a string with repeating content (and only repeating content)... For example: "AbcdAbcdAbcdAbcd" => "Abcd" "Hello" => "Hello" I'm having some trouble coming up with an algorithm that is fairly efficient; any input would be appreciated. Clarification: I want the shortest string that, when repeated enough times, is equal to the total string.
private static string FindShortestRepeatingString(string value) { if (value == null) throw new ArgumentNullException("value", "The value paramter is null."); for (int substringLength = 1; substringLength <= value.Length / 2; substringLength++) if (IsRepeatingStringOfLength(value, substringLength)) return value.Substring(0, substringLength); return value; } private static bool IsRepeatingStringOfLength(string value, int substringLength) { if (value.Length % substringLength != 0) return false; int instanceCount = value.Length / substringLength; for (int characterCounter = 0; characterCounter < substringLength; characterCounter++) { char currentChar = value[characterCounter]; for (int instanceCounter = 1; instanceCounter < instanceCount; instanceCounter++) if (value[instanceCounter * substringLength + characterCounter] != currentChar) return false; } return true; }
Maybe this can work: static string FindShortestSubstringPeriod(string input) { if (string.IsNullOrEmpty(input)) return input; for (int length = 1; length <= input.Length / 2; ++length) { int remainder; int repetitions = Math.DivRem(input.Length, length, out remainder); if (remainder != 0) continue; string candidate = input.Remove(length); if (String.Concat(Enumerable.Repeat(candidate, repetitions)) == input) return candidate; } return input; }
Something like this: public string ShortestRepeating(string str) { for(int len = 1; len <= str.Length/2; len++) { if (str.Length % len == 0) { sub = str.SubString(0, len); StringBuilder builder = new StringBuilder(str.Length) while(builder.Length < str.Length) builder.Append(sub); if(str == builder.ToString()) return sub; } } return str; } This just starts looking at sub strings starting at the beginning and then repeats them to see if they match. It also skips any that do not have a length that doesn't evenly divide into the original strings length and only goes up to the length / 2 since anything over that cannot be a candidate for repeating.
I'd go with something like this: private static string FindRepeat(string str) { var lengths = Enumerable.Range(1, str.Length - 1) .Where(len => str.Length % len == 0); foreach (int len in lengths) { bool matched = true; for (int index = 0; matched && index < str.Length; index += len) { for (int i = index; i < index + len; ++i) { if (str[i - index] != str[i]) { matched = false; break; } } } if (matched) return str.Substring(0, len); } return str; }
Try this regular expression: ^(\w*?)\1*$ It captures as few characters as possible where the captured sequence (and only the captured sequence) repeat 0 or more times. You can get the text of the shortest match from the capture afterwards, as per Jacob's answer.
You could use a regular expression with back-references. Match match = Regex.Match(#"^(.*?)\0*$"); String smallestRepeat = match.Groups[0];
Very interested in the implementation of Array.Reverse [duplicate]
This question already has answers here: Closed 10 years ago. Possible Duplicate: Logic behind the Array.Reverse method I can not find an source of this method. Who knows how it is implemented? char[] inputstream = source.ToCharArray(); Array.Reverse(inputstream);
Using .NET Reflector :) public static void Reverse(Array array, int index, int length) { if (array == null) { throw new ArgumentNullException("array"); } if ((index < array.GetLowerBound(0)) || (length < 0)) { throw new ArgumentOutOfRangeException((index < 0) ? "index" : "length", Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); } if ((array.Length - (index - array.GetLowerBound(0))) < length) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidOffLen")); } if (array.Rank != 1) { throw new RankException(Environment.GetResourceString("Rank_MultiDimNotSupported")); } if (!TrySZReverse(array, index, length)) { int num = index; int num2 = (index + length) - 1; object[] objArray = array as object[]; if (objArray == null) { while (num < num2) { object obj3 = array.GetValue(num); array.SetValue(array.GetValue(num2), num); array.SetValue(obj3, num2); num++; num2--; } } else { while (num < num2) { object obj2 = objArray[num]; objArray[num] = objArray[num2]; objArray[num2] = obj2; num++; num2--; } } } }