Faster modulus in C/C#?

Faster modulus in C/C#? - c#

Is there a trick for creating a faster integer modulus than the standard % operator for particular bases?
For my program, I'd be looking for around 1000-4000 (e.g. n%2048). Is there a quicker way to perform n modulus 2048 than simply: n%2048?

If the denominator is known at compile time to be a power of 2, like your example of 2048, you could subtract 1 and do a bitwise-and.
That is:
n % m == n & (m - 1)
...where m is a power of 2.
For example:
22 % 8 == 22 - 16 == 6
Dec Bin
----- -----
22 = 10110
8 = 01000
8 - 1 = 00111
22 & (8 - 1) = 10110
& 00111
-------
6 = 00110
Bear in mind that a good compiler will have its own optimizations for %, maybe even enough to be as fast as the above technique. Arithmetic operators tend to be pretty heavily optimized.

For powers of two 2^n, all you have to do is zero out all bits except the last n bits.
For example (assuming 32 bit integers):
x%2 is equivalent to x & 0x00000001
x%4 is equivalent to x & 0x00000003
In general x % (2^n) is equal to x & (2^n-1). Written out in C, this would be x & ((1<<n)-1).
This is because 2^n gives you a 1 in the n+1th bit (from the right). So 2^n-1 will give you n ones on the right, and zeros on the left.

You could zero out the high order bits i.e.
x = 11 = 1011
x % 4 = 3 = 0011
so for x % 4 you could just take the last 2 bits - I'm not sure what would happen if negative numbers were used though

Here's a few techniques that replicate the modulus operation.
Of those benchmarked, this was the fastest (modified to fit your 2048 scenario). As long as your "max" isn't millions and in the 1000-4000 range you mentioned, it may work faster for you too:
int threshold = 2048; //the number to mod by
int max = 1000; //the number on the left. Ex: 1000 % 2048
int total = 0;
int y = 0;
for (int x = 0; x < max; x++)
{
if (y > (threshold - 1))
{
y = 0;
total += x;
}
y += 1;
}
return total;
Give it a go. It performed faster on the author's machine at various settings, so should perform admirably well for you too.

Branchless non-power-of-two modulus is possible by precomputing magic constants at run-time, to implement division using a multiply-add-shift.
This is roughly 2x faster than the built-in modulo operator % on my Intel Core i5.
I'm surprised it's not more dramatic, as x86 CPU div instructions can have latencies as high as 80-90 cycles for 64-bit division on some CPUs, compared to mul at 3 cycles and bitwise ops at 1 cycle each.
Proof of concept and timings shown below. series_len refers to the number of modulus ops performed in series on a single var. That's to prevent the CPU from hiding latencies through parallelization.
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
typedef int32_t s32;
typedef uint32_t u32;
typedef uint64_t u64;
#define NUM_NUMS 1024
#define NUM_RUNS 500
#define MAX_NUM UINT32_MAX
#define MAX_DEN 1024
struct fastdiv {
u32 mul;
u32 add;
s32 shift;
u32 _odiv; /* save original divisor for modulo calc */
};
static u32 num[NUM_NUMS];
static u32 den[NUM_NUMS];
static struct fastdiv fd[NUM_NUMS];
/* hash of results to prevent gcc from optimizing out our ops */
static u32 cookie = 0;
/* required for magic constant generation */
u32 ulog2(u32 v) {
u32 r, shift;
r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF ) << 3; v >>= shift; r |= shift;
shift = (v > 0xF ) << 2; v >>= shift; r |= shift;
shift = (v > 0x3 ) << 1; v >>= shift; r |= shift;
r |= (v >> 1);
return r;
}
/* generate constants for implementing a division with multiply-add-shift */
void fastdiv_make(struct fastdiv *d, u32 divisor) {
u32 l, r, e;
u64 m;
d->_odiv = divisor;
l = ulog2(divisor);
if (divisor & (divisor - 1)) {
m = 1ULL << (l + 32);
d->mul = (u32)(m / divisor);
r = (u32)m - d->mul * divisor;
e = divisor - r;
if (e < (1UL << l)) {
++d->mul;
d->add = 0;
} else {
d->add = d->mul;
}
d->shift = l;
} else {
if (divisor == 1) {
d->mul = 0xffffffff;
d->add = 0xffffffff;
d->shift = 0;
} else {
d->mul = 0x80000000;
d->add = 0;
d->shift = l-1;
}
}
}
/* 0: use function that checks for a power-of-2 modulus (speedup for POTs)
* 1: use inline macro */
#define FASTMOD_BRANCHLESS 0
#define fastdiv(v,d) ((u32)(((u64)(v)*(d)->mul + (d)->add) >> 32) >> (d)->shift)
#define _fastmod(v,d) ((v) - fastdiv((v),(d)) * (d)->_odiv)
#if FASTMOD_BRANCHLESS
#define fastmod(v,d) _fastmod((v),(d))
#else
u32 fastmod(u32 v, struct fastdiv *d) {
if (d->mul == 0x80000000) {
return (v & ((1 << d->shift) - 1));
}
return _fastmod(v,d);
}
#endif
u32 random32(u32 upper_bound) {
return arc4random_uniform(upper_bound);
}
u32 random32_range(u32 lower_bound, u32 upper_bound) {
return random32(upper_bound - lower_bound) + lower_bound;
}
void fill_arrays() {
int i;
for (i = 0; i < NUM_NUMS; ++i) {
num[i] = random32_range(MAX_DEN, MAX_NUM);
den[i] = random32_range(1, MAX_DEN);
fastdiv_make(&fd[i], den[i]);
}
}
void fill_arrays_pot() {
u32 log_bound, rand_log;
int i;
log_bound = ulog2(MAX_DEN);
for (i = 0; i < NUM_NUMS; ++i) {
num[i] = random32_range(MAX_DEN, MAX_NUM);
rand_log = random32(log_bound) + 1;
den[i] = 1 << rand_log;
fastdiv_make(&fd[i], den[i]);
}
}
u64 clock_ns() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec*1000000000 + tv.tv_usec*1000;
}
void use_value(u32 v) {
cookie += v;
}
int main(int argc, char **arg) {
u64 builtin_npot_ns;
u64 builtin_pot_ns;
u64 branching_npot_ns;
u64 branching_pot_ns;
u64 branchless_npot_ns;
u64 branchless_pot_ns;
u64 t0, t1;
u32 v;
int s, r, i, j;
int series_len;
builtin_npot_ns = builtin_pot_ns = 0;
branching_npot_ns = branching_pot_ns = 0;
branchless_npot_ns = branchless_pot_ns = 0;
for (s = 5; s >= 0; --s) {
series_len = 1 << s;
for (r = 0; r < NUM_RUNS; ++r) {
/* built-in NPOT */
fill_arrays();
t0 = clock_ns();
for (i = 0; i < NUM_NUMS; ++i) {
v = num[i];
for (j = 0; j < series_len; ++j) {
v /= den[i];
}
use_value(v);
}
t1 = clock_ns();
builtin_npot_ns += (t1 - t0) / NUM_NUMS;
/* built-in POT */
fill_arrays_pot();
t0 = clock_ns();
for (i = 0; i < NUM_NUMS; ++i) {
v = num[i];
for (j = 0; j < series_len; ++j) {
v /= den[i];
}
use_value(v);
}
t1 = clock_ns();
builtin_pot_ns += (t1 - t0) / NUM_NUMS;
/* branching NPOT */
fill_arrays();
t0 = clock_ns();
for (i = 0; i < NUM_NUMS; ++i) {
v = num[i];
for (j = 0; j < series_len; ++j) {
v = fastmod(v, fd+i);
}
use_value(v);
}
t1 = clock_ns();
branching_npot_ns += (t1 - t0) / NUM_NUMS;
/* branching POT */
fill_arrays_pot();
t0 = clock_ns();
for (i = 0; i < NUM_NUMS; ++i) {
v = num[i];
for (j = 0; j < series_len; ++j) {
v = fastmod(v, fd+i);
}
use_value(v);
}
t1 = clock_ns();
branching_pot_ns += (t1 - t0) / NUM_NUMS;
/* branchless NPOT */
fill_arrays();
t0 = clock_ns();
for (i = 0; i < NUM_NUMS; ++i) {
v = num[i];
for (j = 0; j < series_len; ++j) {
v = _fastmod(v, fd+i);
}
use_value(v);
}
t1 = clock_ns();
branchless_npot_ns += (t1 - t0) / NUM_NUMS;
/* branchless POT */
fill_arrays_pot();
t0 = clock_ns();
for (i = 0; i < NUM_NUMS; ++i) {
v = num[i];
for (j = 0; j < series_len; ++j) {
v = _fastmod(v, fd+i);
}
use_value(v);
}
t1 = clock_ns();
branchless_pot_ns += (t1 - t0) / NUM_NUMS;
}
builtin_npot_ns /= NUM_RUNS;
builtin_pot_ns /= NUM_RUNS;
branching_npot_ns /= NUM_RUNS;
branching_pot_ns /= NUM_RUNS;
branchless_npot_ns /= NUM_RUNS;
branchless_pot_ns /= NUM_RUNS;
printf("series_len = %d\n", series_len);
printf("----------------------------\n");
printf("builtin_npot_ns : %llu ns\n", builtin_npot_ns);
printf("builtin_pot_ns : %llu ns\n", builtin_pot_ns);
printf("branching_npot_ns : %llu ns\n", branching_npot_ns);
printf("branching_pot_ns : %llu ns\n", branching_pot_ns);
printf("branchless_npot_ns : %llu ns\n", branchless_npot_ns);
printf("branchless_pot_ns : %llu ns\n\n", branchless_pot_ns);
}
printf("cookie=%u\n", cookie);
}
Results
Intel Core i5 (MacBookAir7,2), macOS 10.11.6, clang 8.0.0
series_len = 32
----------------------------
builtin_npot_ns : 218 ns
builtin_pot_ns : 225 ns
branching_npot_ns : 115 ns
branching_pot_ns : 42 ns
branchless_npot_ns : 110 ns
branchless_pot_ns : 110 ns
series_len = 16
----------------------------
builtin_npot_ns : 87 ns
builtin_pot_ns : 89 ns
branching_npot_ns : 47 ns
branching_pot_ns : 19 ns
branchless_npot_ns : 45 ns
branchless_pot_ns : 45 ns
series_len = 8
----------------------------
builtin_npot_ns : 32 ns
builtin_pot_ns : 34 ns
branching_npot_ns : 18 ns
branching_pot_ns : 10 ns
branchless_npot_ns : 17 ns
branchless_pot_ns : 17 ns
series_len = 4
----------------------------
builtin_npot_ns : 15 ns
builtin_pot_ns : 16 ns
branching_npot_ns : 8 ns
branching_pot_ns : 3 ns
branchless_npot_ns : 7 ns
branchless_pot_ns : 7 ns
series_len = 2
----------------------------
builtin_npot_ns : 8 ns
builtin_pot_ns : 7 ns
branching_npot_ns : 4 ns
branching_pot_ns : 2 ns
branchless_npot_ns : 2 ns
branchless_pot_ns : 2 ns

The fastest way to multiply/divide unsigned integers numbers is by bit shifting them left or right. Shift operations match directly to CPU commands. For example, 3 << 2 =6, while 4>>1 = 2.
You can use the same trick to calculate the module: Shift an integer far enough to the left so that only the remainder bits are left, then shift it back right so you can check the remainder value.
On the other hand, integer modulo also exists as a CPU command. If the integer modulo operator maps to this command in optimized builds, you will not see any improvement by using the bit shift trick.
The following code caclulates 7%4 by shifting far enough that only the 2 last bits are left (since 4=2^2). This means that we need to shift 30 bits:
uint i=7;
var modulo=((i<<30)>>30);
The result is 3
EDIT:
I just read all the solutions proposing simply erasing the higher order bits. It has the same effect, but a lot simpler and direct.

If you are dividing by literals that are powers of two, then the answer is probably No: Any decent compiler will automatically turn such expressions into a variation of an AND operation, which is pretty close to optimal.

Related

Calculating e number C#

I am trying to calculating e number by that
e = 1 + (1/1! + 1/2! + 1/3! + ..)
User going to select number of trials on that form.
form
int trialNumber = Convert.ToInt32(Math.Round(trialNumberForm.Value, 0));
int factorial = trialNumber;
float factResult = 0;
for (int i = 1; i < trialNumber; i++)
{
for (int b = 1; b < i; b++) //calculates x! here.
{
factorial = factorial * b;
}
factResult = factResult + (1 / factorial);
}
factResult++;
MessageBox.Show(factResult.ToString());
It calculates the result 1 which ever number you selected! I've tried to change variable type to float from double but that didn't fix it. How to act on numbers by formula which I wrote above?

You have no need in factorial (with its integer division and integer overflow problems) at all since
1/(n+1)! == (1/n!)/(n+1)
You can implement e computation as easy as
double factResult = 1; // turn double into float if you want
double item = 1; // turn double into float if you want
for (int i = 1; i < trialNumber; ++i)
factResult += (item /= i);
...
MessageBox.Show(factResult.ToString());
Outcomes:
trial number | e
-------------------------------
1 | 1
2 | 2
3 | 2.5
4 | 2.666666...
5 | 2.708333...
10 | 2.71828152557319
15 | 2.71828182845823
20 | 2.71828182845905

As #kabdulla and #ScottChamberlain said, you are doing integer division where you need a float division :
for (int b = 1; b < i; b++) //calculates x! here.
{
factorial = factorial * b;
}
factResult = factResult + (1 / factorial);
Should be
for (int b = 2; b < i; b++) //calculates x! here.
{
factorial = factorial * b;
}
factResult = factResult + (1.0 / factorial);
Plus I started the for loop at b = 2 because multiplying by 1 is useless.

Reversing a long in c#

I know in java that you can simply reverse a long (101010100000001) by using long.reverse (100000001010101). However, is there anything like these that exists in c#.

The answer to your question is no. However it is achievable by code.
How about this...
public static long RevLong(long l)
{
long tmp = l;
long r = 0L;
if (tmp < 0)
tmp *= -1;
while (tmp > 0)
{
r = (r * 10) + (tmp - ((tmp / 10)) * 10);
tmp = tmp / 10;
}
return r * (l < 0 ? -1 : 1);
}

How about...
public ulong Bit(ulong x, int n)
{
return (x & (1 << n)) >> n;
}
public ulong ReverseBits(ulong x)
{
ulong result = 0;
for (int i = 0; i < 64; i++)
result = result | (x.Bit(64 - i) << i);
return result;
}

Another aproach for reversing a long is:
long num = 123456789;
long reversed = 0;
while (num > 0)
{
reversed = (reversed * 10) + (num % 10);
num /= 10;
}
or
long num = 123456789;
long reversed = 0;
while (num > 0)
{
reversed = (reversed << 1) + (reversed << 3) + (num & 1);
num >>= 1;
}

There are some interesting examples here. You could adapt one of these into an extension method, like so:
public static class LongExtension
{
public static ulong Reverse(this ulong value)
{
return (value & 0x00000000000000FFUL) << 56 | (value & 0x000000000000FF00UL) << 40 |
(value & 0x0000000000FF0000UL) << 24 | (value & 0x00000000FF000000UL) << 8 |
(value & 0x000000FF00000000UL) >> 8 | (value & 0x0000FF0000000000UL) >> 24 |
(value & 0x00FF000000000000UL) >> 40 | (value & 0xFF00000000000000UL) >> 56;
}
}
Then you can call it like this:
ulong myLong = 3L;
ulong reversed = myLong.Reverse();

Hope this will work
string s = 101010100000001.tostring();
char[] charArray = s.ToCharArray();
Array.Reverse( charArray );
return new string( charArray );

Despite conventional wisdom, using + instead of | to combine bytes into an int always works?

Conventional wisdom has it that when you are ORing bytes together to make an int, you should use the | operator rather than the + operator, otherwise you could have problems with the sign bit.
But this doesn't appear to be the case in C#. It looks like you can happily use the + operator, and it still works even for negative results.
My questions:
Is this really true?
If so, why does it work? (And why do a lot of people think it shouldn't - including me! ;)
Here's a test program which I believe tests every possible combination of four bytes using the + operator and the | operator, and verifies that both approaches yield the same results.
Here's the test code:
using System;
using System.Diagnostics;
namespace Demo
{
class Program
{
int Convert1(byte b1, byte b2, byte b3, byte b4)
{
return b1 + (b2 << 8) + (b3 << 16) + (b4 << 24);
}
int Convert2(byte b1, byte b2, byte b3, byte b4)
{
return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
}
void Run()
{
byte b = 0xff;
Trace.Assert(Convert1(b, b, b, b) == -1); // Sanity check.
Trace.Assert(Convert2(b, b, b, b) == -1);
for (int i = 0; i < 256; ++i)
{
Console.WriteLine(i);
byte b1 = (byte) i;
for (int j = 0; j < 256; ++j)
{
byte b2 = (byte) j;
for (int k = 0; k < 256; ++k)
{
byte b3 = (byte) k;
for (int l = 0; l < 256; ++l)
{
byte b4 = (byte) l;
Trace.Assert(Convert1(b1, b2, b3, b4) == Convert2(b1, b2, b3, b4));
}
}
}
}
Console.WriteLine("Done.");
}
static void Main()
{
new Program().Run();
}
}
}
[EDIT]
To see how this works, consider this:
byte b = 0xff;
int i1 = b;
int i2 = (b << 8);
int i3 = (b << 16);
int i4 = (b << 24);
Console.WriteLine(i1);
Console.WriteLine(i2);
Console.WriteLine(i3);
Console.WriteLine(i4);
int total = i1 + i2 + i3 + i4;
Console.WriteLine(total);
This prints:
255
65280
16711680
-16777216
-1
Aha!

Differences:
When bits overlap, | and + will produce different results:
2 | 3 = 3
2 + 3 = 5
When actually using signed bytes, the result will be different:
-2 | -3 = -1
-2 + (-3) = -5

Calculate square root of a BigInteger (System.Numerics.BigInteger)

.NET 4.0 provides the System.Numerics.BigInteger type for arbitrarily-large integers. I need to compute the square root (or a reasonable approximation -- e.g., integer square root) of a BigInteger. So that I don't have to reimplement the wheel, does anyone have a nice extension method for this?

Check if BigInteger is not a perfect square has code to compute the integer square root of a Java BigInteger. Here it is translated into C#, as an extension method.
public static BigInteger Sqrt(this BigInteger n)
{
if (n == 0) return 0;
if (n > 0)
{
int bitLength = Convert.ToInt32(Math.Ceiling(BigInteger.Log(n, 2)));
BigInteger root = BigInteger.One << (bitLength / 2);
while (!isSqrt(n, root))
{
root += n / root;
root /= 2;
}
return root;
}
throw new ArithmeticException("NaN");
}
private static Boolean isSqrt(BigInteger n, BigInteger root)
{
BigInteger lowerBound = root*root;
BigInteger upperBound = (root + 1)*(root + 1);
return (n >= lowerBound && n < upperBound);
}
Informal testing indicates that this is about 75X slower than Math.Sqrt, for small integers. The VS profiler points to the multiplications in isSqrt as the hotspots.

I am not sure if Newton's Method is the best way to compute bignum square roots, because it involves divisions which are slow for bignums. You can use a CORDIC method, which uses only addition and shifts (shown here for unsigned ints)
static uint isqrt(uint x)
{
int b=15; // this is the next bit we try
uint r=0; // r will contain the result
uint r2=0; // here we maintain r squared
while(b>=0)
{
uint sr2=r2;
uint sr=r;
// compute (r+(1<<b))**2, we have r**2 already.
r2+=(uint)((r<<(1+b))+(1<<(b+b)));
r+=(uint)(1<<b);
if (r2>x)
{
r=sr;
r2=sr2;
}
b--;
}
return r;
}
There's a similar method which uses only addition and shifts, called 'Dijkstras Square Root', explained for example here:
http://lib.tkk.fi/Diss/2005/isbn9512275279/article3.pdf

Ok, first a few speed tests of some variants posted here. (I only considered methods which give exact results and are at least suitable for BigInteger):
+------------------------------+-------+------+------+-------+-------+--------+--------+--------+
| variant - 1000x times | 2e5 | 2e10 | 2e15 | 2e25 | 2e50 | 2e100 | 2e250 | 2e500 |
+------------------------------+-------+------+------+-------+-------+--------+--------+--------+
| my version | 0.03 | 0.04 | 0.04 | 0.76 | 1.44 | 2.23 | 4.84 | 23.05 |
| RedGreenCode (bound opti.) | 0.56 | 1.20 | 1.80 | 2.21 | 3.71 | 6.10 | 14.53 | 51.48 |
| RedGreenCode (newton method) | 0.80 | 1.21 | 2.12 | 2.79 | 5.23 | 8.09 | 19.90 | 65.36 |
| Nordic Mainframe (CORDIC) | 2.38 | 5.52 | 9.65 | 19.80 | 46.69 | 90.16 | 262.76 | 637.82 |
| Sunsetquest (without divs) | 2.37 | 5.48 | 9.11 | 24.50 | 56.83 | 145.52 | 839.08 | 4.62 s |
| Jeremy Kahan (js-port) | 46.53 | #.## | #.## | #.## | #.## | #.## | #.## | #.## |
+------------------------------+-------+------+------+-------+-------+--------+--------+--------+
+------------------------------+--------+--------+--------+---------+---------+--------+--------+
| variant - single | 2e1000 | 2e2500 | 2e5000 | 2e10000 | 2e25000 | 2e50k | 2e100k |
+------------------------------+--------+--------+--------+---------+---------+--------+--------+
| my version | 0.10 | 0.77 | 3.46 | 14.97 | 105.19 | 455.68 | 1,98 s |
| RedGreenCode (bound opti.) | 0.26 | 1.41 | 6.53 | 25.36 | 182.68 | 777.39 | 3,30 s |
| RedGreenCode (newton method) | 0.33 | 1.73 | 8.08 | 32.07 | 228.50 | 974.40 | 4,15 s |
| Nordic Mainframe (CORDIC) | 1.83 | 7.73 | 26.86 | 94.55 | 561.03 | 2,25 s | 10.3 s |
| Sunsetquest (without divs) | 31.84 | 450.80 | 3,48 s | 27.5 s | #.## | #.## | #.## |
| Jeremy Kahan (js-port) | #.## | #.## | #.## | #.## | #.## | #.## | #.## |
+------------------------------+--------+--------+--------+---------+---------+--------+--------+
- value example: 2e10 = 20000000000 (result: 141421)
- times in milliseconds or with "s" in seconds
- #.##: need more than 5 minutes (timeout)
Descriptions:
Jeremy Kahan (js-port)
Jeremy's simple algorithm works, but the computational effort increases exponentially very fast due to the simple adding/subtracting... :)
Sunsetquest (without divs)
The approach without dividing is good, but due to the divide and conquer variant the results converges relatively slowly (especially with large numbers)
Nordic Mainframe (CORDIC)
The CORDIC algorithm is already quite powerful, although the bit-by-bit operation of the imuttable BigIntegers generates much overhead.
I have calculated the required bits this way: int b = Convert.ToInt32(Math.Ceiling(BigInteger.Log(x, 2))) / 2 + 1;
RedGreenCode (newton method)
The proven newton method shows that something old does not have to be slow. Especially the fast convergence of large numbers can hardly be topped.
RedGreenCode (bound opti.)
The proposal of Jesan Fafon to save a multiplication has brought a lot here.
my version
First: calculate small numbers at the beginning with Math.Sqrt() and as soon as the accuracy of double is no longer sufficient, then use the newton algorithm. However, I try to pre-calculate as many numbers as possible with Math.Sqrt(), which makes the newton algorithm converge much faster.
Here the source:
static readonly BigInteger FastSqrtSmallNumber = 4503599761588223UL; // as static readonly = reduce compare overhead
static BigInteger SqrtFast(BigInteger value)
{
if (value <= FastSqrtSmallNumber) // small enough for Math.Sqrt() or negative?
{
if (value.Sign < 0) throw new ArgumentException("Negative argument.");
return (ulong)Math.Sqrt((ulong)value);
}
BigInteger root; // now filled with an approximate value
int byteLen = value.ToByteArray().Length;
if (byteLen < 128) // small enough for direct double conversion?
{
root = (BigInteger)Math.Sqrt((double)value);
}
else // large: reduce with bitshifting, then convert to double (and back)
{
root = (BigInteger)Math.Sqrt((double)(value >> (byteLen - 127) * 8)) << (byteLen - 127) * 4;
}
for (; ; )
{
var root2 = value / root + root >> 1;
if ((root2 == root || root2 == root + 1) && IsSqrt(value, root)) return root;
root = value / root2 + root2 >> 1;
if ((root == root2 || root == root2 + 1) && IsSqrt(value, root2)) return root2;
}
}
static bool IsSqrt(BigInteger value, BigInteger root)
{
var lowerBound = root * root;
return value >= lowerBound && value <= lowerBound + (root << 1);
}
full Benchmark-Source:
using System;
using System.Numerics;
using System.Diagnostics;
namespace MathTest
{
class Program
{
static readonly BigInteger FastSqrtSmallNumber = 4503599761588223UL; // as static readonly = reduce compare overhead
static BigInteger SqrtMax(BigInteger value)
{
if (value <= FastSqrtSmallNumber) // small enough for Math.Sqrt() or negative?
{
if (value.Sign < 0) throw new ArgumentException("Negative argument.");
return (ulong)Math.Sqrt((ulong)value);
}
BigInteger root; // now filled with an approximate value
int byteLen = value.ToByteArray().Length;
if (byteLen < 128) // small enough for direct double conversion?
{
root = (BigInteger)Math.Sqrt((double)value);
}
else // large: reduce with bitshifting, then convert to double (and back)
{
root = (BigInteger)Math.Sqrt((double)(value >> (byteLen - 127) * 8)) << (byteLen - 127) * 4;
}
for (; ; )
{
var root2 = value / root + root >> 1;
if ((root2 == root || root2 == root + 1) && IsSqrt(value, root)) return root;
root = value / root2 + root2 >> 1;
if ((root == root2 || root == root2 + 1) && IsSqrt(value, root2)) return root2;
}
}
static bool IsSqrt(BigInteger value, BigInteger root)
{
var lowerBound = root * root;
return value >= lowerBound && value <= lowerBound + (root << 1);
}
// newton method
public static BigInteger SqrtRedGreenCode(BigInteger n)
{
if (n == 0) return 0;
if (n > 0)
{
int bitLength = Convert.ToInt32(Math.Ceiling(BigInteger.Log(n, 2)));
BigInteger root = BigInteger.One << (bitLength / 2);
while (!isSqrtRedGreenCode(n, root))
{
root += n / root;
root /= 2;
}
return root;
}
throw new ArithmeticException("NaN");
}
private static bool isSqrtRedGreenCode(BigInteger n, BigInteger root)
{
BigInteger lowerBound = root * root;
//BigInteger upperBound = (root + 1) * (root + 1);
return n >= lowerBound && n <= lowerBound + root + root;
//return (n >= lowerBound && n < upperBound);
}
// without divisions
public static BigInteger SqrtSunsetquest(BigInteger number)
{
if (number < 9)
{
if (number == 0)
return 0;
if (number < 4)
return 1;
else
return 2;
}
BigInteger n = 0, p = 0;
var high = number >> 1;
var low = BigInteger.Zero;
while (high > low + 1)
{
n = (high + low) >> 1;
p = n * n;
if (number < p)
{
high = n;
}
else if (number > p)
{
low = n;
}
else
{
break;
}
}
return number == p ? n : low;
}
// javascript port
public static BigInteger SqrtJeremyKahan(BigInteger n)
{
var oddNumber = BigInteger.One;
var result = BigInteger.Zero;
while (n >= oddNumber)
{
n -= oddNumber;
oddNumber += 2;
result++;
}
return result;
}
// CORDIC
public static BigInteger SqrtNordicMainframe(BigInteger x)
{
int b = Convert.ToInt32(Math.Ceiling(BigInteger.Log(x, 2))) / 2 + 1;
BigInteger r = 0; // r will contain the result
BigInteger r2 = 0; // here we maintain r squared
while (b >= 0)
{
var sr2 = r2;
var sr = r;
// compute (r+(1<<b))**2, we have r**2 already.
r2 += (r << 1 + b) + (BigInteger.One << b + b);
r += BigInteger.One << b;
if (r2 > x)
{
r = sr;
r2 = sr2;
}
b--;
}
return r;
}
static void Main(string[] args)
{
var t2 = BigInteger.Parse("2" + new string('0', 10000));
//var q1 = SqrtRedGreenCode(t2);
var q2 = SqrtSunsetquest(t2);
//var q3 = SqrtJeremyKahan(t2);
//var q4 = SqrtNordicMainframe(t2);
var q5 = SqrtMax(t2);
//if (q5 != q1) throw new Exception();
if (q5 != q2) throw new Exception();
//if (q5 != q3) throw new Exception();
//if (q5 != q4) throw new Exception();
for (int r = 0; r < 5; r++)
{
var mess = Stopwatch.StartNew();
//for (int i = 0; i < 1000; i++)
{
//var q = SqrtRedGreenCode(t2);
var q = SqrtSunsetquest(t2);
//var q = SqrtJeremyKahan(t2);
//var q = SqrtNordicMainframe(t2);
//var q = SqrtMax(t2);
}
mess.Stop();
Console.WriteLine((mess.ElapsedTicks * 1000.0 / Stopwatch.Frequency).ToString("N2") + " ms");
}
}
}
}

Short answer: (but beware, see below for more details)
Math.Pow(Math.E, BigInteger.Log(pd) / 2)
Where pd represents the BigInteger on which you want to perform the square root operation.
Long answer and explanation:
Another way to understanding this problem is understanding how square roots and logs work.
If you have the equation 5^x = 25, to solve for x we must use logs. In this example, I will use natural logs (logs in other bases are also possible, but the natural log is the easy way).
5^x = 25
Rewriting, we have:
x(ln 5) = ln 25
To isolate x, we have
x = ln 25 / ln 5
We see this results in x = 2. But since we already know x (x = 2, in 5^2), let's change what we don't know and write a new equation and solve for the new unknown. Let x be the result of the square root operation. This gives us
2 = ln 25 / ln x
Rewriting to isolate x, we have
ln x = (ln 25) / 2
To remove the log, we now use a special identity of the natural log and the special number e. Specifically, e^ln x = x. Rewriting the equation now gives us
e^ln x = e^((ln 25) / 2)
Simplifying the left hand side, we have
x = e^((ln 25) / 2)
where x will be the square root of 25. You could also extend this idea to any root or number, and the general formula for the yth root of x becomes e^((ln x) / y).
Now to apply this specifically to C#, BigIntegers, and this question specifically, we simply implement the formula. WARNING: Although the math is correct, there are finite limits. This method will only get you in the neighborhood, with a large unknown range (depending on how big of a number you operate on). Perhaps this is why Microsoft did not implement such a method.
// A sample generated public key modulus
var pd = BigInteger.Parse("101017638707436133903821306341466727228541580658758890103412581005475252078199915929932968020619524277851873319243238741901729414629681623307196829081607677830881341203504364437688722228526603134919021724454060938836833023076773093013126674662502999661052433082827512395099052335602854935571690613335742455727");
var sqrt = Math.Pow(Math.E, BigInteger.Log(pd) / 2);
Console.WriteLine(sqrt);
NOTE: The BigInteger.Log() method returns a double, so two concerns arise. 1) The number is imprecise, and 2) there is an upper limit on what Log() can handle for BigInteger inputs. To examine the upper limit, we can look at normal form for the natural log, that is ln x = y. In other words, e^y = x. Since double is the return type of BigInteger.Log(), it would stand to reason the largest BigInteger would then be e raised to double.MaxValue. On my computer, that would e^1.79769313486232E+308. The imprecision is unhandled. Anyone want to implement BigDecimal and update BigInteger.Log()?
Consumer beware, but it will get you in the neighborhood, and squaring the result does produce a number similar to the original input, up to so many digits and not as precise as RedGreenCode's answer. Happy (square) rooting! ;)

You can convert this to the language and variable types of your choice. Here is a truncated squareroot in JavaScript (freshest for me) that takes advantage of 1+3+5...+nth odd number = n^2. All the variables are integers, and it only adds and subtracts.
var truncSqrt = function(n) {
var oddNumber = 1;
var result = 0;
while (n >= oddNumber) {
n -= oddNumber;
oddNumber += 2;
result++;
}
return result;
};

Update: For best performance, use the Newton Plus version.
That one is hundreds of times faster. I am leaving this one for reference, however, as an alternative way.
// Source: http://mjs5.com/2016/01/20/c-biginteger-square-root-function/ Michael Steiner, Jan 2016
// Slightly modified to correct error below 6. (thank you M Ktsis D)
public static BigInteger SteinerSqrt(BigInteger number)
{
if (number < 9)
{
if (number == 0)
return 0;
if (number < 4)
return 1;
else
return 2;
}
BigInteger n = 0, p = 0;
var high = number >> 1;
var low = BigInteger.Zero;
while (high > low + 1)
{
n = (high + low) >> 1;
p = n * n;
if (number < p)
{
high = n;
}
else if (number > p)
{
low = n;
}
else
{
break;
}
}
return number == p ? n : low;
}
Update: Thank you to M Ktsis D for finding a bug in this. It has been corrected with a guard clause.

The two methods below use the babylonian method to calculate the square root of the provided number. The Sqrt method returns BigInteger type and therefore will only provide answer to the last whole number (no decimal points).
The method will use 15 iterations, although after a few tests, I found out that 12-13 iterations are enough for 80+ digit numbers, however I decided to keep it at 15 just in case.
As the Babylonian square root approximation method requires us to pick a number that is half the length of the number that we want to find square root of, the RandomBigIntegerOfLength() method therefore provides that number.
The RandomBigIntegerOfLength() takes an integer length of a number as an argument and provides a randomly generated number of that length. The number is generated using the Next() method from the Random class, the Next() method is called twice in order to avoid the number to have 0 at the beginning (something like 041657180501613764193159871) as it causes the DivideByZeroException. It is important to point out that initially the number is generataed one by one, concatenated, and only then it is converted to BigInteger type from string.
The Sqrt method uses the RandomBigIntegerOfLength method to obtain a random number of half the length of the provided argument "number" and then calculates the square root using the babylonian method with 15 iterations. The number of iterations may be changed to smaller or bigger as you would like. As the babylonian method cannot provide square root of 0, as it requires dividing by 0, in case 0 is provided as an argument it will return 0.
//Copy the two methods
public static BigInteger Sqrt(BigInteger number)
{
BigInteger _x = RandomBigIntegerOfLength((number.ToString().ToCharArray().Length / 2));
try
{
for (int i = 0; i < 15; i++)
{
_x = (_x + number / _x) / 2;
}
return _x;
}
catch (DivideByZeroException)
{
return 0;
}
}
// Copy this method as well
private static BigInteger RandomBigIntegerOfLength(int length)
{
Random rand = new Random();
string _randomNumber = "";
_randomNumber = String.Concat(_randomNumber, rand.Next(1, 10));
for (int i = 0; i < length-1; i++)
{
_randomNumber = String.Concat(_randomNumber,rand.Next(10).ToString());
}
if (String.IsNullOrEmpty(_randomNumber) == false) return BigInteger.Parse(_randomNumber);
else return 0;
}

*** World's fastest BigInteger Sqrt for Java/C# !!!!***
Write-Up: https://www.codeproject.com/Articles/5321399/NewtonPlus-A-Fast-Big-Number-Square-Root-Function
Github: https://github.com/SunsetQuest/NewtonPlus-Fast-BigInteger-and-BigFloat-Square-Root
public static BigInteger NewtonPlusSqrt(BigInteger x)
{
if (x < 144838757784765629) // 1.448e17 = ~1<<57
{
uint vInt = (uint)Math.Sqrt((ulong)x);
if ((x >= 4503599761588224) && ((ulong)vInt * vInt > (ulong)x)) // 4.5e15 = ~1<<52
{
vInt--;
}
return vInt;
}
double xAsDub = (double)x;
if (xAsDub < 8.5e37) // long.max*long.max
{
ulong vInt = (ulong)Math.Sqrt(xAsDub);
BigInteger v = (vInt + ((ulong)(x / vInt))) >> 1;
return (v * v <= x) ? v : v - 1;
}
if (xAsDub < 4.3322e127)
{
BigInteger v = (BigInteger)Math.Sqrt(xAsDub);
v = (v + (x / v)) >> 1;
if (xAsDub > 2e63)
{
v = (v + (x / v)) >> 1;
}
return (v * v <= x) ? v : v - 1;
}
int xLen = (int)x.GetBitLength();
int wantedPrecision = (xLen + 1) / 2;
int xLenMod = xLen + (xLen & 1) + 1;
//////// Do the first Sqrt on hardware ////////
long tempX = (long)(x >> (xLenMod - 63));
double tempSqrt1 = Math.Sqrt(tempX);
ulong valLong = (ulong)BitConverter.DoubleToInt64Bits(tempSqrt1) & 0x1fffffffffffffL;
if (valLong == 0)
{
valLong = 1UL << 53;
}
//////// Classic Newton Iterations ////////
BigInteger val = ((BigInteger)valLong << 52) + (x >> xLenMod - (3 * 53)) / valLong;
int size = 106;
for (; size < 256; size <<= 1)
{
val = (val << (size - 1)) + (x >> xLenMod - (3 * size)) / val;
}
if (xAsDub > 4e254) // 4e254 = 1<<845.76973610139
{
int numOfNewtonSteps = BitOperations.Log2((uint)(wantedPrecision / size)) + 2;
////// Apply Starting Size ////////
int wantedSize = (wantedPrecision >> numOfNewtonSteps) + 2;
int needToShiftBy = size - wantedSize;
val >>= needToShiftBy;
size = wantedSize;
do
{
//////// Newton Plus Iterations ////////
int shiftX = xLenMod - (3 * size);
BigInteger valSqrd = (val * val) << (size - 1);
BigInteger valSU = (x >> shiftX) - valSqrd;
val = (val << size) + (valSU / val);
size *= 2;
} while (size < wantedPrecision);
}
/////// There are a few extra digits here, lets save them ///////
int oversidedBy = size - wantedPrecision;
BigInteger saveDroppedDigitsBI = val & ((BigInteger.One << oversidedBy) - 1);
int downby = (oversidedBy < 64) ? (oversidedBy >> 2) + 1 : (oversidedBy - 32);
ulong saveDroppedDigits = (ulong)(saveDroppedDigitsBI >> downby);
//////// Shrink result to wanted Precision ////////
val >>= oversidedBy;
//////// Detect a round-ups ////////
if ((saveDroppedDigits == 0) && (val * val > x))
{
val--;
}
////////// Error Detection ////////
//// I believe the above has no errors but to guarantee the following can be added.
//// If an error is found, please report it.
//BigInteger tmp = val * val;
//if (tmp > x)
//{
// Console.WriteLine($"Missed , {ToolsForOther.ToBinaryString(saveDroppedDigitsBI, oversidedBy)}, {oversidedBy}, {size}, {wantedPrecision}, {saveDroppedDigitsBI.GetBitLength()}");
// if (saveDroppedDigitsBI.GetBitLength() >= 6)
// Console.WriteLine($"val^2 ({tmp}) < x({x}) off%:{((double)(tmp)) / (double)x}");
// //throw new Exception("Sqrt function had internal error - value too high");
//}
//if ((tmp + 2 * val + 1) <= x)
//{
// Console.WriteLine($"(val+1)^2({((val + 1) * (val + 1))}) >= x({x})");
// //throw new Exception("Sqrt function had internal error - value too low");
//}
return val;
}
Below is a log-based chart. Please note a small difference is a huge difference in performance. All are in C# except GMP (C++/Asm) which was added for comparison. Java's version (ported to C#) has also been added.

Converting a range into a bit array

I'm writing a time-critical piece of code in C# that requires me to convert two unsigned integers that define an inclusive range into a bit field. Ex:
uint x1 = 3;
uint x2 = 9;
//defines the range [3-9]
// 98 7654 3
//must be converted to: 0000 0011 1111 1000
It may help to visualize the bits in reverse order
The maximum value for this range is a parameter given at run-time which we'll call max_val. Therefore, the bit field variable ought to be defined as a UInt32 array with size equal to max_val/32:
UInt32 MAX_DIV_32 = max_val / 32;
UInt32[] bitArray = new UInt32[MAX_DIV_32];
Given a range defined by the variables x1 and x2, what is the fastest way to perform this conversion?

Try this. Calculate the range of array items that must be filled with all ones and do this by iterating over this range. Finally set the items at both borders.
Int32 startIndex = x1 >> 5;
Int32 endIndex = x2 >> 5;
bitArray[startIndex] = UInt32.MaxValue << (x1 & 31);
for (Int32 i = startIndex + 1; i <= endIndex; i++)
{
bitArray[i] = UInt32.MaxValue;
}
bitArray[endIndex] &= UInt32.MaxValue >> (31 - (x2 & 31));
May be the code is not 100% correct, but the idea should work.
Just tested it and found three bugs. The calculation at start index required a mod 32 and at end index the 32 must be 31 and a logical and instead of a assignment to handle the case of start and end index being the same. Should be quite fast.
Just benchmarked it with equal distribution of x1 and x2 over the array.
Intel Core 2 Duo E8400 3.0 GHz, MS VirtualPC with Server 2003 R2 on Windows XP host.
Array length [bits] 320 160 64
Performance [executions/s] 33 million 43 million 54 million
One more optimazation x % 32 == x & 31 but I am unable to meassure a performance gain. Because of only 10.000.000 iterations in my test the fluctuations are quite high. And I am running in VirtualPC making the situation even more unpredictable.

My solution for setting a whole range of bits in a BitArray to true or false:
public static BitArray SetRange(BitArray bitArray, Int32 offset, Int32 length, Boolean value)
{
Int32[] ints = new Int32[(bitArray.Count >> 5) + 1];
bitArray.CopyTo(ints, 0);
var firstInt = offset >> 5;
var lastInt = (offset + length) >> 5;
Int32 mask = 0;
if (value)
{
// set first and last int
mask = (-1 << (offset & 31));
if (lastInt != firstInt)
ints[lastInt] |= ~(-1 << ((offset + length) & 31));
else
mask &= ~(-1 << ((offset + length) & 31));
ints[firstInt] |= mask;
// set all ints in between
for (Int32 i = firstInt + 1; i < lastInt; i++)
ints[i] = -1;
}
else
{
// set first and last int
mask = ~(-1 << (offset & 31));
if (lastInt != firstInt)
ints[lastInt] &= -1 << ((offset + length) & 31);
else
mask |= -1 << ((offset + length) & 31);
ints[firstInt] &= mask;
// set all ints in between
for (Int32 i = firstInt + 1; i < lastInt; i++)
ints[i] = 0;
}
return new BitArray(ints) { Length = bitArray.Length };
}

You could try:
UInt32 x1 = 3;
UInt32 x2 = 9;
UInt32 newInteger = (UInt32)(Math.Pow(2, x2 + 1) - 1) &
~(UInt32)(Math.Pow(2, x1)-1);

Is there a reason not to use the System.Collections.BitArray class instead of a UInt32[]? Otherwise, I'd try something like this:
int minIndex = (int)x1/32;
int maxIndex = (int)x2/32;
// first handle the all zero regions and the all one region (if any)
for (int i = 0; i < minIndex; i++) {
bitArray[i] = 0;
}
for (int i = minIndex + 1; i < maxIndex; i++) {
bitArray[i] = UInt32.MaxValue; // set to all 1s
}
for (int i = maxIndex + 1; i < MAX_DIV_32; i++) {
bitArray[i] = 0;
}
// now handle the tricky parts
uint maxBits = (2u << ((int)x2 - 32 * maxIndex)) - 1; // set to 1s up to max
uint minBits = ~((1u << ((int)x1 - 32 * minIndex)) - 1); // set to 1s after min
if (minIndex == maxIndex) {
bitArray[minIndex] = maxBits & minBits;
}
else {
bitArray[minIndex] = minBits;
bitArray[maxIndex] = maxBits;
}

I was bored enough to try doing it with a char array and using Convert.ToUInt32(string, int) to convert to a uint from base 2.
uint Range(int l, int h)
{
char[] buffer = new char[h];
for (int i = 0; i < buffer.Length; i++)
{
buffer[i] = i < h - l ? '1' : '0';
}
return Convert.ToUInt32(new string(buffer), 2);
}
A simple benchmark shows that my method is about 5% faster than Angrey Jim's (even if you replace second Pow with a bit shift.)
It is probably the easiest to convert to producing a uint array if the upper bound is too big to fit into a single int. It's a little cryptic but I believe it works.
uint[] Range(int l, int h)
{
char[] buffer = new char[h];
for (int i = 0; i < buffer.Length; i++)
{
buffer[i] = i < h - l ? '1' : '0';
}
int bitsInUInt = sizeof(uint) * 8;
int numNeededUInts = (int)Math.Ceiling((decimal)buffer.Length /
(decimal)bitsInUInt);
uint[] uints = new uint[numNeededUInts];
for (int j = uints.Length - 1, s = buffer.Length - bitsInUInt;
j >= 0 && s >= 0;
j--, s -= bitsInUInt)
{
uints[j] = Convert.ToUInt32(new string(buffer, s, bitsInUInt), 2);
}
int remainder = buffer.Length % bitsInUInt;
if (remainder > 0)
{
uints[0] = Convert.ToUInt32(new string(buffer, 0, remainder), 2);
}
return uints;
}

Try this:
uint x1 = 3;
uint x2 = 9;
int cbToShift = x2 - x1; // 6
int nResult = ((1 << cbToShift) - 1) << x1;
/*
(1<<6)-1 gives you 63 = 111111, then you shift it on 3 bits left
*/

Develop Reference

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

Faster modulus in C/C#? - c#

Is there a trick for creating a faster integer modulus than the standard % operator for particular bases? For my program, I'd be looking for around 1000-4000 (e.g. n%2048). Is there a quicker way to perform n modulus 2048 than simply: n%2048?

You could zero out the high order bits i.e. x = 11 = 1011 x % 4 = 3 = 0011 so for x % 4 you could just take the last 2 bits - I'm not sure what would happen if negative numbers were used though

If you are dividing by literals that are powers of two, then the answer is probably No: Any decent compiler will automatically turn such expressions into a variation of an AND operation, which is pretty close to optimal.

Related

Calculating e number C#

Reversing a long in c#

Despite conventional wisdom, using + instead of | to combine bytes into an int always works?

Calculate square root of a BigInteger (System.Numerics.BigInteger)

Converting a range into a bit array

Categories

Resources