pdf content stream parsing - c#

i need help with parsing pdf
the pdf builded in illustrator and it have 4 layer and each layer have one graphic path object
what i wont to do is to get all the 4 graphic paths and draw them in another pdf file that have the same width and hight as this pdf and i want to draw them in the same positions.
this is the code i started to write:
public static List<PDFMask> GetMasksFromPage(PdfPage page)
{
List<PDFMask> masks = new List<PDFMask>();
PdfDictionary contents = page.Elements.GetDictionary("/Contents");
PdfDictionary.PdfStream contentsStream = contents.Stream;
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
PdfDictionary properties = resources.Elements.GetDictionary("/Properties");
PdfName[] keys = properties.Elements.KeyNames;
int dataStartPointer = 0;
int dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
int dataCount = dataEndPointer+4;
for (int i = 0; i < keys.Length; i++)
{
PdfDictionary mc = properties.Elements.GetDictionary(keys[i].Value);
PDFMask mask = new PDFMask();
mask.name = mc.Elements.GetString("/Title");
mask.key = keys[i].Value;
byte[] data = new byte[dataCount];
Array.Copy(contentsStream.UnfilteredValue, dataStartPointer, mask.data, 0, dataCount);
mask.parseData(data);
dataStartPointer += dataCount+1;
dataEndPointer = Utils.Bytes.IndexOf(contentsStream.UnfilteredValue, Encoding.ASCII.GetBytes("EMC "), dataStartPointer);
dataCount = dataEndPointer + 4 - dataStartPointer;
masks.Add(mask);
}
return masks;
}
now the code above used for get all the layers data and seporate them in to 4 objects
PdfDictionary.PdfStream contentsStream = contents.Stream;
this line give me the 4 layers grapichs binary data
now this is the PDFMask Class that repesent a 1 layer
public class PDFMask
{
public string name;
public string key;
public byte[] data;
public void parseData(byte[] data)
{
this.data = data; //how i parsing this data to some XGrapic Object?
}
}
now this is what the data source look like:
/Layer /MC0 BDC
0.75 0.68 0.67 0.902 k
/GS0 gs
q 1 0 0 1 396.4473 1835.6143 cm
0 0 m
76.497 -132.515 l
-17.184 -159.051 l
76.496 -185.607 l
-0.003 -318.119 l
-72.563 -252.047 l
-50.486 -349.178 l
-202.179 -349.182 l
-180.097 -252.046 l
-252.658 -318.116 l
-329.154 -185.603 l
-235.473 -159.048 l
-329.154 -132.511 l
-252.654 0.002 l
-180.094 -66.07 l
-202.175 31.087 l
-50.482 31.081 l
-72.563 -66.072 l
h
f
Q
EMC
i looking for some parser (i will prefer a pdfsharp parser)
that can parse this data to some graphic object that i colud use it on another pdf document

ok what i did to slove this is to buid my own parser for my own needs i will display here th code i am sure it will help someone someday...
public struct GD { public double x, y, a, b, c, d; public byte t; }
public struct Coordinate { public double locX, locY, oriX, oriY, xAxis, yAxis; }
public class PDFMask
{
private string _name;
public string fun;
public string name
{
get
{
return _name;
}
set
{
if (value.Contains("{"))
{
_name = value.Substring(0, value.IndexOf("{"));
fun = value.Substring(value.IndexOf("{"));
}
else
{
_name = value;
}
}
}
public string key;
public byte[] data;
public GD[] graphicsDirectives;
public Coordinate coordinate;
public void parseData(byte[] data)
{
this.data = data;
graphicsDirectives = new GD[100];
int gdCount = 0;
byte[] buffer = new byte[100];
int bufferCount = 0;
for (int i = 0; i < data.Length; i++)
{
switch (data[i])
{
case (byte)'\n':
if (bufferCount > 2 && buffer[bufferCount - 2] == ' ' && (buffer[bufferCount - 1] == 'c' || buffer[bufferCount - 1] == 'l' || buffer[bufferCount - 1] == 'm'))
graphicsDirectives[gdCount++] = parseDataWriteGD(buffer, bufferCount);
else if (bufferCount > 3 && buffer[0] == 'q' && buffer[bufferCount - 1] == 'm' && buffer[bufferCount - 2] == 'c')
coordinate = parseDataWriteCoordinate(buffer, bufferCount);
bufferCount = 0;
break;
default :
buffer[bufferCount++] = data[i];
break;
}
}
GD[] actualGraphicsDirectives = new GD[gdCount];
Array.Copy(graphicsDirectives, actualGraphicsDirectives, gdCount);
graphicsDirectives = actualGraphicsDirectives;
}
public Coordinate parseDataWriteCoordinate(byte[] bytes, int count)
{
byte[] actualBytes = new byte[count];
Array.Copy(bytes, actualBytes, count);
string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
Coordinate c = new Coordinate();
c.locX = double.Parse(values[1]);
c.locY = double.Parse(values[2]);
c.oriX = double.Parse(values[3]);
c.oriY = double.Parse(values[4]);
c.xAxis = double.Parse(values[5]);
c.yAxis = double.Parse(values[6]);
return c;
}
public GD parseDataWriteGD(byte[] bytes, int count)
{
byte[] actualBytes = new byte[count];
Array.Copy(bytes, actualBytes, count);
string[] values = Encoding.ASCII.GetString(actualBytes).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
GD gd = new GD();
gd.t = (byte)values[values.Length - 1][0];
if (gd.t == 'c')
{
gd.a = double.Parse(values[0]);
gd.b = double.Parse(values[1]);
gd.c = double.Parse(values[2]);
gd.d = double.Parse(values[3]);
gd.x = double.Parse(values[4]);
gd.y = double.Parse(values[5]);
}
else
{
gd.x = double.Parse(values[0]);
gd.y = double.Parse(values[1]);
}
return gd;
}
}

Related

C# All value of before elements be assign by last element in my class array even though I not access that before elements twice

This is my class I created:
public class FD
{
private char[] L;
private char[] R;
public char[] myL
{
get { return L; }
set { L = value; }
}
public char[] myR
{
get { return R; }
set { R = value; }
}
}
and my function:
public FD[] DetachedRight(FD element)
{
int count = element.myR.Length;
FD[] FDtemp = new FD[count];
char[] temp = element.myR;
char[] OneChar = new char[1];
for (int i = 0; i < count; i++)
{
FDtemp[i] = element;
OneChar[0] = temp[i];
FDtemp[i].myR(OneChar);
}
for (int j = 0; j < FDtemp.Length; j++)
{
MessageBox.Show(new string(FDtemp[j].myR()) + "==>" + new string(FDtemp[j].myR()), "Result");
}
return FDtemp;
}
This is what I want:
Example: input (element parameter in code) with value element.myL = FD and element.myR = XYZ
I need: output is an array type FD, in example, it has 3 element in array:
FDtemp[0].myL = FD and FDtemp[0].myR = X;
FDtemp[1].myL = FD and FDtemp[1].myR = Y;
FDtemp[2].myL = FD and FDtemp[2].myR = Z;
In fact, the result I achieved: FDtemp[0] like FDtemp[1] like FDtemp[2], all .myL = FD, all .myR = Z (value in last for loop).
Could you help me explain and solving this?
Combo: before, when I not use char[] temp = element.myR; in my function, OneChar[0] = temp[i]; replece by OneChar[0] = element.myR[i];, at second for loop, total length of element.myR is 1, in my example, it is element.myR = "X", "XY" not exist, why? I do not understand? Please help me explain that.

C# Function to translate Binary-Code

Right now I try to write a C# Program to translate 8 Base Binary into Text.
But I guess I am not experienced enough with C# to truly make it Work.
I think the code I come up with, should, from a logical Point-of-View somewhat do what I want, but the Syntax isn't properly doing it, since don´t know it better.
This is what I have so far:
using System;
using System.Linq;
using System.Text;
class binaryTranslate
{
public int convertBin(string CodeInput)
{
int [] code = CodeInput.ToArray();
int CodeCount = code.ToString().Length;
int EightBaseSegAmount = CodeCount / 8;
int ByteCapacity = 8;
StringBuilder translated = new StringBuilder();
for (var i = 1; i < EightBaseSegAmount + 1; i++)
{
StringBuilder Byte = new StringBuilder(ByteCapacity);
int ByteStart = (i * 8) - 8;
int ByteEnd = (i * 8) - 1;
int ByteIncrement = 1;
for (var j = ByteStart ; j < ByteEnd + 1; j++)
{
Byte.Append(code[j]);
}
for (var k = 0; k > 7; k++)
{
int BitValue = 128;
if (Byte[k] == 1)
{
if (k > 0)
{
int Squared = Math.Pow(2, k);
ByteIncrement += BitValue / Squared;
}
else
{
ByteIncrement += BitValue;
}
}
}
char toSymbol = Convert.ToChar(ByteIncrement);
translated.Append(toSymbol);
}
return translated;
}
public static int Main()
{
convertBin("010010000110000101101100011011000110111100100001");
}
}
First of all, your code won't compile. Here are the errors/mistakes.
The first one is, at the first line of your function, you are converting the input string to an array using String.ToArray(), which returns a char[] but your try to assign it to a variable (code) typed int[]. You can solve this by replacing the int[] with either char[] or var.
The second one is, inside the second for loop (k = 0; k > 7), you use Math.Pow() and assign it's return value to an int variable (Squared). But Math.Pow returns double. You can solve this by casting the return value of Math.Pow to int. Like; int Squared = (int)Math.Pow(2, k);
The last thing is not easily solvable like the first two because, your code is not exactly correct. You are trying to return something called translated, which is a variable of type StringBuilder. But your function is defined to return an int.
Now these were compile errors. There are a bunch of logical and decision errors/mistakes. Your algorithm also isn't very correct.
Here is a sample code you can use/examine. I'd like to help you further, why your code was incorrect, what was your design mistakes etc. if you want to.
class binaryTranslate
{
public enum IncompleteSegmentBehavior
{
Skip = 0,
ZerosToStart = 1,
ZerosToEnd = 2
}
private byte ConvertBinstrToByte(string sequence)
{
if (string.IsNullOrEmpty(sequence))
return 0; // Throw?
if (sequence.Length != sizeof(byte) * 8)
return 0; // Throw?
const char zero = '0';
const char one = '1';
byte value = 0;
for (int i = 0; i < sequence.Length; i++)
{
if (sequence[i] != zero && sequence[i] != one)
return 0; // Throw
value |= (byte)((sequence[i] - zero) << (7 - i));
}
return value;
}
private string HandleIncompleteSegment(string segment, int segmentSize, IncompleteSegmentBehavior behavior)
{
string result = null;
var zeroAppender = new StringBuilder();
for (int i = 0; i < segmentSize - segment.Length; i++)
zeroAppender.Append('0');
var zeros = zeroAppender.ToString();
switch (behavior)
{
case IncompleteSegmentBehavior.Skip:
break;
case IncompleteSegmentBehavior.ZerosToStart:
result = zeros + result;
break;
case IncompleteSegmentBehavior.ZerosToEnd:
result = result + zeros;
break;
default:
break;
}
return result;
}
public byte[] ConvertBinstrToBytes(string binarySequence, IncompleteSegmentBehavior behavior = IncompleteSegmentBehavior.Skip)
{
var segmentSize = sizeof(byte) * 8;
var sequenceLength = binarySequence.Length;
var numberOfBytes = (int)Math.Ceiling((double)sequenceLength / segmentSize);
var bytes = new byte[numberOfBytes];
for (int i = 0; i < numberOfBytes; i++)
{
var charactersLeft = sequenceLength - i * segmentSize;
var segmentLength = (charactersLeft < segmentSize ? charactersLeft : segmentSize);
var segment = binarySequence.Substring(i * segmentSize, segmentLength);
if (charactersLeft < segmentSize)
{
segment = HandleIncompleteSegment(segment, segmentSize, behavior);
if (segment == null)
continue;
}
bytes[i] = ConvertBinstrToByte(segment);
}
return bytes;
}
}
This code passes these assertions.
var bytes = new binaryTranslate()
.ConvertBinstrToBytes("00000000");
Assert.Equal(bytes.Length, 1);
Assert.Equal(bytes[0], 0b00000000);
bytes = new binaryTranslate()
.ConvertBinstrToBytes("10000000");
Assert.Equal(bytes.Length, 1);
Assert.Equal(bytes[0], 0b10000000);
bytes = new binaryTranslate()
.ConvertBinstrToBytes("11111111");
Assert.Equal(bytes.Length, 1);
Assert.Equal(bytes[0], 0b11111111);
bytes = new binaryTranslate()
.ConvertBinstrToBytes("00000001");
Assert.Equal(bytes.Length, 1);
Assert.Equal(bytes[0], 0b00000001);
bytes = new binaryTranslate()
.ConvertBinstrToBytes("1100110000110011");
Assert.Equal(bytes.Length, 2);
Assert.Equal(bytes[0], 0b11001100);
Assert.Equal(bytes[1], 0b00110011);
If you are really converting to a string the code should look like this
namespace binaryTranslate
{
class Program
{
static void Main(string[] args)
{
//convertBin("01001000 01100001 01101100 01101100 01101111 00100001");
string results = BinaryTranslate.convertBin(new byte[] { 0x44, 0x61, 0x6c, 0x6c, 0x6f, 0x21 });
}
}
public class BinaryTranslate
{
public static string convertBin(byte[] CodeInput)
{
return string.Join("", CodeInput.Select(x => x.ToString("X2")));
}
}
}
this should do the trick.
public static string FromBinary(string binary)
{
int WordLength = 8;
binary = binary.Replace(' ', '');
while(binary.Length % WordLength != 0)
binary += "0";
string output = String.Empty;
string word = String.Empty;
int offset = 0;
while(offset < binary.Length)
{
int tmp = 0;
word = binary.Substring(offset, 8);
for(int i=0; i<(WordLength - 1); i++)
if(word[i] == '1')
tmp += (int) Math.Pow(2, i);
output += Convert.ToChar(tmp);
offset += WordLength;
}
return output;
}

C# RNGCryptoServiceProvider and special characters

I'm looking for a way to get random characters.I need a string must be contain at 2 least uppercase letters, at least 1 number and special characters.
Here is my code:
public static string CreateRandomPassword(int Length)
{
string _Chars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ[_!23456790";
Byte[] randomBytes = new Byte[Length];
var rng = new RNGCryptoServiceProvider();
rng.GetBytes(randomBytes);
var chars = new char[Length];
int Count = _Chars.Length;
for(int i = 0;i<Length;i++)
{
chars[i] = _Chars[(int)randomBytes[i] % Count];
}
return new string(chars);
}
some results:
ZNQzvUPFKOL3x
BQSEkKHXACGO
They haven't special characters and numbers.
your code works great! I've just wrapped it with a function that validate your conditions.
I've executed the following:
public static string CreateRandomPassword(int Length)
{
string _Chars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ[_!23456790";
Byte[] randomBytes = new Byte[Length];
var rng = new RNGCryptoServiceProvider();
rng.GetBytes(randomBytes);
var chars = new char[Length];
int Count = _Chars.Length;
for (int i = 0; i < Length; i++)
{
chars[i] = _Chars[(int)randomBytes[i] % Count];
}
return new string(chars);
}
public static string CreateRandomPasswordWith2UpperAnd1NumberAnd1Special(int length)
{
while (true)
{
var pass = CreateRandomPassword(length);
int upper=0, num =0, special = 0,lower=0;
foreach (var c in pass)
{
if (c > 'A' && c < 'Z')
{
upper++;
}
else if (c > 'a' && c < 'z')
{
lower++;
}
else if (c > '0' && c < '9')
{
num++;
}
else
{
special++;
}
}
if (upper>=2&&num>=1&&1>=special)
{
return pass;
}
}
}
[Test]
public void CreateRandomPassword_Length13_RandomPasswordWithNumbers()
{
var random = CreateRandomPasswordWith2UpperAnd1NumberAnd1Special(13);
Assert.IsTrue(true);
}

Convert byte/int to List<int> reversed and vice versa

Was wondering how can I convert an int to a List in reverse order padded with zeroes and vice versa?
Have a byte that represents List(8), sometimes 2 bytes for List(16), 8 bytes for List(64); so looking for a good solution to handle converting to an int list, manipulate then back again.
e.g. Input of 3 to a List of 1,1,0,0,0,0,0,0
Or input of 42 to a List of 0,1,0,1,0,1,0,0
And vice-versa, take a List of 1,1,0,0,0,0,0,0 and return 3 or List of 0,1,0,1,0,1,0,0 and return 42
What I have done at present is build a couple of functions to handle both scenarios, all works fine, just wondering if there is a better / more elegant solution that I've completelt overlooked?
private List<int> IntToList(int _Input)
{
string _Binary = ReverseString(Convert.ToString(_Input, 2).PadLeft(8, '0'));
List<int> _List = new List<int>(8);
for (int i = 0; i < _Binary.Length; i++)
{
_List.Add(Convert.ToInt32(_Binary.Substring(i, 1)));
}
return _List;
}
private int IntsToByte(List<int> _List)
{
string _Binary = "";
for (int i = 7; i > -1; i--)
{
_Binary += _List[i];
}
return Convert.ToInt32(_Binary, 2);
}
You can work with bitwise operations. They might be fast.
Warning : Be aware of Little/Big Endian (More here)
The following code works :
private List<int> IntToList(int _Input, int _MaxSize = 8)
{
int padding = 1;
List<int> resultList = new List<int>(_MaxSize);
while (padding < 1 << _MaxSize)
{
resultList.Add((_Input & padding) == padding ? 1 : 0);
padding = padding << 1;
}
return resultList;
}
private int IntsToByte(List<int> _List)
{
int result = 0, padding = 0;
foreach (int i in _List)
{
result = result | (i << padding++);
}
return result;
}
This should work
int number = 42
char[] reverse = Convert.ToString(number, 2).PadLeft(8, '0').ToCharArray();
Array.Reverse(reverse);
Try this
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
List<ulong> results = null;
List<byte> output = null;
List<byte> input1 = new List<byte>() { 1, 1, 0, 0, 0, 0, 0, 0 };
results = ReadList(input1, 1);
output = WriteList(results,1);
List<byte> input2 = new List<byte>() { 0, 1, 0, 1, 0, 1, 0, 0 };
results = ReadList(input2, 1);
output = WriteList(results,1);
}
static List<ulong> ReadList(List<byte> input, int size)
{
List<ulong> results = new List<ulong>();
input.Reverse();
MemoryStream stream = new MemoryStream(input.ToArray());
BinaryReader reader = new BinaryReader(stream);
int count = 0;
ulong newValue = 0;
while (reader.PeekChar() != -1)
{
switch (size)
{
case 1:
newValue = ((ulong)Math.Pow(2, size) * newValue) + (ulong)reader.ReadByte();
break;
case 2:
newValue = ((ulong)Math.Pow(2, size) * newValue) + (ulong)reader.ReadInt16();
break;
}
if (++count == size)
{
results.Add(newValue);
newValue = 0;
count = 0;
}
}
return results;
}
static List<byte> WriteList(List<ulong> input, int size)
{
List<byte> results = new List<byte>();
foreach (ulong num in input)
{
ulong result = num;
for (int count = 0; count < size; count++)
{
if (result > 0)
{
byte bit = (byte)(result % Math.Pow(2, size));
results.Add(bit);
result = (ulong)(result / Math.Pow(2, size));
}
else
{
results.Add(0);
}
}
}
results.Reverse();
return results;
}
}
}
​
Solution from OP.
Have gone with Jean Bob's suggestion of using BitWise.
For anyone elses benefit, here is my modified version to read / write in blocks of 8 to/from the list.
private List<int> IntToList(List<int> _List, int _Input)
{
int _Padding = 1;
while (_Padding < 1 << 8)
{
_List.Add((_Input & _Padding) == _Padding ? 1 : 0);
_Padding = _Padding << 1;
}
return _List;
}
private int IntsToByte(List<int> _List, int l)
{
int _Result = 0, _Padding = 0;
for (int i = l; i < (l + 8); i++)
{
_Result = _Result | (_List[i] << _Padding++);
}
return _Result;
}

Get longest substring between two strings

I have two words,
britanicaeng and
britanicahin
I need to find out the longest common word between these i.e, britanica.
How can I do this in C# ?
Try this method:
public static string FindLongestCommonSubstring(string s1, string s2)
{
int[,] a = new int[s1.Length + 1, s2.Length + 1];
int row = 0; // s1 index
int col = 0; // s2 index
for (var i = 0; i < s1.Length; i++)
for (var j = 0; j < s2.Length; j++)
if (s1[i] == s2[j])
{
int len = a[i + 1, j + 1] = a[i, j] + 1;
if (len > a[row, col])
{
row = i + 1;
col = j + 1;
}
}
return s1.Substring(row - a[row, col], a[row, col]);
}
Usage example:
Console.WriteLine(FindLongestCommonSubstring("britanicaeng", "britanicahin"));
I refactored the C++ code from Ashutosh Singh at https://iq.opengenus.org/longest-common-substring-using-rolling-hash/ to create a rolling hash approach in C# - this will find the substring in O(N * log(N)^2) time and O(N) space
using System;
using System.Collections.Generic;
public class RollingHash
{
private class RollingHashPowers
{
// _mod = prime modulus of polynomial hashing
// any prime number over a billion should suffice
internal const int _mod = (int)1e9 + 123;
// _hashBase = base (point of hashing)
// this should be a prime number larger than the number of characters used
// in my use case I am only interested in ASCII (256) characters
// for strings in languages using non-latin characters, this should be much larger
internal const long _hashBase = 257;
// _pow1 = powers of base modulo mod
internal readonly List<int> _pow1 = new List<int> { 1 };
// _pow2 = powers of base modulo 2^64
internal readonly List<long> _pow2 = new List<long> { 1L };
internal void EnsureLength(int length)
{
if (_pow1.Capacity < length)
{
_pow1.Capacity = _pow2.Capacity = length;
}
for (int currentIndx = _pow1.Count - 1; currentIndx < length; ++currentIndx)
{
_pow1.Add((int)(_pow1[currentIndx] * _hashBase % _mod));
_pow2.Add(_pow2[currentIndx] * _hashBase);
}
}
}
private class RollingHashedString
{
readonly RollingHashPowers _pows;
readonly int[] _pref1; // Hash on prefix modulo mod
readonly long[] _pref2; // Hash on prefix modulo 2^64
// Constructor from string:
internal RollingHashedString(RollingHashPowers pows, string s, bool caseInsensitive = false)
{
_pows = pows;
_pref1 = new int[s.Length + 1];
_pref2 = new long[s.Length + 1];
const long capAVal = 'A';
const long capZVal = 'Z';
const long aADif = 'a' - 'A';
unsafe
{
fixed (char* c = s)
{
// Fill arrays with polynomial hashes on prefix
for (int i = 0; i < s.Length; ++i)
{
long v = c[i];
if (caseInsensitive && capAVal <= v && v <= capZVal)
{
v += aADif;
}
_pref1[i + 1] = (int)((_pref1[i] + v * _pows._pow1[i]) % RollingHashPowers._mod);
_pref2[i + 1] = _pref2[i] + v * _pows._pow2[i];
}
}
}
}
// Rollingnomial hash of subsequence [pos, pos+len)
// If mxPow != 0, value automatically multiply on base in needed power.
// Finally base ^ mxPow
internal Tuple<int, long> Apply(int pos, int len, int mxPow = 0)
{
int hash1 = _pref1[pos + len] - _pref1[pos];
long hash2 = _pref2[pos + len] - _pref2[pos];
if (hash1 < 0)
{
hash1 += RollingHashPowers._mod;
}
if (mxPow != 0)
{
hash1 = (int)((long)hash1 * _pows._pow1[mxPow - (pos + len - 1)] % RollingHashPowers._mod);
hash2 *= _pows._pow2[mxPow - (pos + len - 1)];
}
return Tuple.Create(hash1, hash2);
}
}
private readonly RollingHashPowers _rhp;
public RollingHash(int longestLength = 0)
{
_rhp = new RollingHashPowers();
if (longestLength > 0)
{
_rhp.EnsureLength(longestLength);
}
}
public string FindCommonSubstring(string a, string b, bool caseInsensitive = false)
{
// Calculate max neede power of base:
int mxPow = Math.Max(a.Length, b.Length);
_rhp.EnsureLength(mxPow);
// Create hashing objects from strings:
RollingHashedString hash_a = new RollingHashedString(_rhp, a, caseInsensitive);
RollingHashedString hash_b = new RollingHashedString(_rhp, b, caseInsensitive);
// Binary search by length of same subsequence:
int pos = -1;
int low = 0;
int minLen = Math.Min(a.Length, b.Length);
int high = minLen + 1;
var tupleCompare = Comparer<Tuple<int, long>>.Default;
while (high - low > 1)
{
int mid = (low + high) / 2;
List<Tuple<int, long>> hashes = new List<Tuple<int, long>>(a.Length - mid + 1);
for (int i = 0; i + mid <= a.Length; ++i)
{
hashes.Add(hash_a.Apply(i, mid, mxPow));
}
hashes.Sort(tupleCompare);
int p = -1;
for (int i = 0; i + mid <= b.Length; ++i)
{
if (hashes.BinarySearch(hash_b.Apply(i, mid, mxPow), tupleCompare) >= 0)
{
p = i;
break;
}
}
if (p >= 0)
{
low = mid;
pos = p;
}
else
{
high = mid;
}
}
// Output answer:
return pos >= 0
? b.Substring(pos, low)
: string.Empty;
}
}

Categories

Resources