I'm trying to profile a function. The function is meant to convert a structure to an array. I have two different approaches, using marshaling or BitConverter. Of course the marshaling method makes a single function that can work with nearly all structures given certain conditions. The BitConverter requires a custom function for each structure. My initial thought was that the BitConverter would be faster but my testing results are not consistent.
Here is a cut and paste of the benchmark.
I've tried the below benchmark in a number of different forms.
When I do the benchmark for the BitConverter function first it tends to be faster.
When I do the benchmark for the Marshaling function first it tends to be faster.
What am I missing?
Summary to show flow. This is not the actual code just how the benchmark flows.
main()
{
Stopwatch watch = new Stopwatch;
// To take care of JIT
bitConverterFunction();
marshalingFunction();
//Thread.Sleep(0); // I've tried this thinking it had to do with context switching issues but the results were basically the same.
watch.Start();
for(i=0; i<iterations; i++)
{
bitConverterFunction();
}
watch.Stop();
Timespan bitConverterTime = watch.Elapsed;
//Thread.Sleep(0); // I've tried this thinking it had to do with context switching issues
watch.Restart();
for(i=0; i<iterations; i++)
{
marshalingFunction();
}
watch.Stop();
Timespan marshalingTime = watch.Elapsed;
// it seems that whichever function is run first, tends to be the quickest.
}
Real Code if you want to test
using System;
using BenchmarkTool;
namespace BenchmarkConsole
{
class Program
{
static void Main(string[] args)
{
Benchmarks.StructToArrayConversion(100);
Benchmarks.StructToArrayConversion(1000);
Benchmarks.StructToArrayConversion(10000);
Benchmarks.StructToArrayConversion(100000);
Console.WriteLine("Press any key to continue.");
Console.ReadKey();
}
}
}
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
using NUnit.Framework;
namespace BenchmarkTool
{
[TestFixture]
public static class Benchmarks
{
[TestCase(100)]
[TestCase(1000)]
[TestCase(10000)]
[TestCase(100000)]
[TestCase(1000000)]
public static void StructToArrayConversion(int iteration = 100)
{
Stopwatch watch = new Stopwatch();
EntityStatePDU6 state = new EntityStatePDU6()
{
Version = 0,
ExerciseID = 0x01,
PDUType = 0x02,
Family = 0x03,
Timestamp = 0x07060504,
Length = 0x0908,
Site = 0x0D0C,
Application = 0X0F0E,
Entity = 0X1110,
NumArticulationParams = 0X13,
VelocityX = BitConverter.ToSingle(new byte[] {0x14, 0x15, 0x16, 0x17}, 0),
VelocityY = BitConverter.ToSingle(new byte[] {0x18, 0x19, 0x1A, 0x1B}, 0),
VelocityZ = BitConverter.ToSingle(new byte[] {0x1C, 0x1D, 0x1E, 0x1F}, 0),
LocationX = BitConverter.ToSingle(new byte[] {0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27}, 0),
LocationY = BitConverter.ToSingle(new byte[] {0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F}, 0),
LocationZ = BitConverter.ToSingle(new byte[] {0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37}, 0),
Roll = BitConverter.ToSingle(new byte[] {0x38, 0x39, 0x3A, 0x3B}, 0),
Pitch = BitConverter.ToSingle(new byte[] {0x3C, 0x3D, 0x3E, 0x3F}, 0),
Heading = BitConverter.ToSingle(new byte[] {0x40, 0x41, 0x42, 0x43}, 0),
Appearance = 0X47464544
};
// To take care of JIT
ToArrayBitConverter(state);
state.ToByteArray();
Console.WriteLine("*** Benchmark Start ***");
Console.WriteLine("BitConverter Benchmark");
byte[] bitconverterArray = ToArrayBitConverter(state);
//Thread.Sleep(0);
watch.Start();
for(int i = 0; i < iteration; i++)
{
bitconverterArray = ToArrayBitConverter(state);
}
watch.Stop();
TimeSpan bitConverterTime = watch.Elapsed;
Console.WriteLine("{0} Iterations: {1}", iteration, watch.Elapsed.TotalSeconds.ToString("0.0000000"));
Console.WriteLine();
Console.WriteLine("Marshal StructToPtr Benchmark");
byte[] marshalArray = null;
//Thread.Sleep(0);
watch.Restart();
for (int i = 0; i < iteration; i++)
{
marshalArray = state.ToByteArray();
}
watch.Stop();
TimeSpan marshalTime = watch.Elapsed;
Console.WriteLine("{0} Iterations: {1}", iteration, watch.Elapsed.TotalSeconds.ToString("0.0000000"));
Console.WriteLine();
Console.WriteLine("Results");
Console.WriteLine("{0} Faster", marshalTime < bitConverterTime ? "Marshaling" : "BitConverter");
Console.WriteLine("Speed Ratio: {0}", marshalTime < bitConverterTime ? bitConverterTime.TotalSeconds / marshalTime.TotalSeconds : marshalTime.TotalSeconds / bitConverterTime.TotalSeconds);
Console.WriteLine("**********************************");
Console.WriteLine();
Assert.AreEqual(bitconverterArray.Length, marshalArray.Length);
for(int i = 0; i < bitconverterArray.Length; i++)
{
Assert.AreEqual(marshalArray[i],bitconverterArray[i], "#index " + i);
}
}
public static byte[] ToArrayBitConverter(EntityStatePDU6 entity)
{
int size = Marshal.SizeOf(typeof (EntityStatePDU6));
byte[] array = new byte[size];
array[0] = entity.Version;
array[1] = entity.ExerciseID;
array[2] = entity.PDUType;
array[3] = entity.Family;
array[4] = (byte)((0xFF & entity.Timestamp));
array[5] = (byte)((0xFF00 & entity.Timestamp) >> 8);
array[6] = (byte)((0xFF0000 & entity.Timestamp) >> 16);
array[7] = (byte)((0xFF000000 & entity.Timestamp) >> 24);
array[8] = (byte)((0xFF & entity.Length));
array[9] = (byte)((0xFF00 & entity.Length) >> 8);
// Padding1: array[10], array[11]
array[12] = (byte)((0xFF & entity.Site));
array[13] = (byte)((0xFF00 & entity.Site) >> 8);
array[14] = (byte)((0xFF & entity.Application));
array[15] = (byte)((0xFF00 & entity.Application) >> 8);
array[16] = (byte)((0xFF & entity.Entity));
array[17] = (byte)((0xFF00 & entity.Entity) >> 8);
//padding2 array[18]
array[19] = entity.NumArticulationParams;
byte[] bytes = BitConverter.GetBytes(entity.VelocityX);
array[20] = bytes[0];
array[21] = bytes[1];
array[22] = bytes[2];
array[23] = bytes[3];
bytes = BitConverter.GetBytes(entity.VelocityY);
array[24] = bytes[0];
array[25] = bytes[1];
array[26] = bytes[2];
array[27] = bytes[3];
bytes = BitConverter.GetBytes(entity.VelocityZ);
array[28] = bytes[0];
array[29] = bytes[1];
array[30] = bytes[2];
array[31] = bytes[3];
bytes = BitConverter.GetBytes(entity.LocationX);
array[32] = bytes[0];
array[33] = bytes[1];
array[34] = bytes[2];
array[35] = bytes[3];
array[36] = bytes[4];
array[37] = bytes[5];
array[38] = bytes[6];
array[39] = bytes[7];
bytes = BitConverter.GetBytes(entity.LocationY);
array[40] = bytes[0];
array[41] = bytes[1];
array[42] = bytes[2];
array[43] = bytes[3];
array[44] = bytes[4];
array[45] = bytes[5];
array[46] = bytes[6];
array[47] = bytes[7];
bytes = BitConverter.GetBytes(entity.LocationZ);
array[48] = bytes[0];
array[49] = bytes[1];
array[50] = bytes[2];
array[51] = bytes[3];
array[52] = bytes[4];
array[53] = bytes[5];
array[54] = bytes[6];
array[55] = bytes[7];
bytes = BitConverter.GetBytes(entity.Roll);
array[56] = bytes[0];
array[57] = bytes[1];
array[58] = bytes[2];
array[59] = bytes[3];
bytes = BitConverter.GetBytes(entity.Pitch);
array[60] = bytes[0];
array[61] = bytes[1];
array[62] = bytes[2];
array[63] = bytes[3];
bytes = BitConverter.GetBytes(entity.Heading);
array[64] = bytes[0];
array[65] = bytes[1];
array[66] = bytes[2];
array[67] = bytes[3];
array[68] = (byte)((0xFF & entity.Appearance));
array[69] = (byte)((0xFF00 & entity.Appearance) >> 8);
array[70] = (byte)((0xFF0000 & entity.Appearance) >> 16);
array[71] = (byte)((0xFF000000 & entity.Appearance) >> 24);
return array;
}
public static Byte[] ToByteArray<T>(this T obj) where T : struct
{
int size = Marshal.SizeOf(obj);
var arr = new byte[size];
IntPtr ptr = Marshal.AllocHGlobal(size);
Marshal.StructureToPtr(obj, ptr, false);
Marshal.Copy(ptr, arr, 0, size);
Marshal.FreeHGlobal(ptr);
return arr;
}
}
public struct EntityStatePDU6
{
// PDU Header 12 Bytes
public byte Version;
public byte ExerciseID;
public byte PDUType;
public byte Family;
public uint Timestamp;
public ushort Length;
public ushort Padding1;
// Entity ID 6 bytes
public ushort Site;
public ushort Application;
public ushort Entity;
public byte Padding2;
public byte NumArticulationParams;
public float VelocityX;
public float VelocityY;
public float VelocityZ;
public double LocationX;
public double LocationY;
public double LocationZ;
public float Roll;
public float Pitch;
public float Heading;
public uint Appearance;
}
}
Any of the cases below 100000 is too small to get consistent results.
The results are very inconsistent even between runs of the same code (> 2x timing differences). It makes me think there is a significant amount of garbage being generated and the outcome is dominated by when garbage collection kicks in and the performance of the garbage collector.
I added some GC.Collect calls after stopping the stopwatch and this made the results somewhat more consistent (variation between runs was +/- 10%). Marshaling was faster for 100000 and 1000000 iterations typically by 1.5 - 2 times. This was on Mono 2.10.8.1 compiled for Release|x86, so your mileage may vary.
Related
I'm trying to create a 16-bit PCM version of NAudio's MixingWaveProvider32 that operates on 16-bit PCM samples instead of 32-bit floats.
Each 16-bit stereo sample is packed in a byte array like so...
Byte 0
Byte 1
Byte 2
Byte 3
Channel 1 (Left) Lo
Channel 1 Hi
Channel 2 (Right) Lo
Channel 2 Hi
The two bytes per channel are interpreted as signed integers, so the minimum value is short.MinValue, the max is short.MaxValue. I don't think you can simply add the byte values to each other.
I've written some very long-handed code (see below) but I am convinced there is a more performant way of doing this.
I'd be really grateful for any help :-)
static void Main(string[] args)
{
// setup some input data
byte[] b1 = { 0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0 };
byte[] b2 = new byte[b1.Length];
Array.Copy(b1, b2, b1.Length);
byte[] result = new byte[b1.Length];
Console.WriteLine("b1");
b1.DumpPcm();
Console.WriteLine();
Console.WriteLine("b2");
b2.DumpPcm();
for (int i = 0; i < b1.Length; i += 4)
{
short l1 = BitConverter.ToInt16(b1, i);
short r1 = BitConverter.ToInt16(b1, i + 2);
short l2 = BitConverter.ToInt16(b2, i);
short r2 = BitConverter.ToInt16(b2, i + 2);
byte[] resl = BitConverter.GetBytes(l1 + l2);
byte[] resr = BitConverter.GetBytes(r1 + r2);
result[i] = resl[0];
result[i + 1] = resl[1];
result[i + 2] = resr[0];
result[i + 3] = resr[1];
}
Console.WriteLine();
Console.WriteLine("Result...");
result.DumpPcm();
Console.ReadLine();
}
You could always use unsafe code, this should be significantly faster since you save a bunch of method calls and object allocations:
// setup some input data
byte[] b1 = {0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0};
byte[] b2 = new byte[b1.Length];
Array.Copy(b1, b2, b1.Length);
byte[] result = new byte[b1.Length];
fixed (byte* b1Ptr = b1)
{
fixed (byte* b2Ptr = b2)
{
fixed (byte* rPtr = result)
{
var s1Ptr = (short*) b1Ptr;
var s2Ptr = (short*) b2Ptr;
var srPtr = (short*) rPtr;
var length = b1.Length / 2;
for (int i = 0; i < length; i++)
{
var v = s1Ptr[i] + s2Ptr[i];
srPtr[i] = (short) v;
Console.WriteLine($"{s1Ptr[i]} + {s2Ptr[i]} -> {srPtr[i]}");
}
}
}
}
Note that summing values might cause overflow. You should probably either average the two samples, or clamp the result to avoid this.
Hi im trying to repeat this wifi baby monitor project based on esp8266:
baby monitor project
But instead of receiving data on another esp8266, I want to receive data on pc.
I'm a c # programmer and I've encountered an problem of understanding c/c++ pointers how arrays works here and receive udp.
this is esp8266 receiver source code which works without any problems, but data that it receives, passes them to DAC. And i cant figure out where i can read just values one by one which was readed by esp8266 transmiter from ADC. Also readed data from ADC is 12 bit and author of original code use all 16 bit with some compression to transfer more data, and this compression part is what i have difficulty to uderstand it
#include <Wire.h>
#include <ESP8266WiFi.h>
#include <WiFiClient.h>
#include <WiFiUdp.h>
#include "ESP8266mDNS.h"
#include <ArduinoOTA.h>
//#include "wifi_params.h"
const int mySDA = D7;
const int mySCL = D6;
const int AMPLI_MUTE_PIN = D2;
const int AMPLI_SHUTDOWN_PIN = D1;
const int RIGHT_BTN = D3;
const int LEFT_BTN = D4;
const int LED1 = D8;
const int udp_recv_port = 45990;
WiFiUDP udp;
TwoWire i2c;
#define NB_DATA_BUFS 5
uint16_t data_buf[NB_DATA_BUFS][700]; // data buffer, N buffered
unsigned int current_play_data_buf; // current data buf being played
unsigned int play_data_buf_pos; // position in the ADC data buffer
unsigned int current_recv_data_buf; // current data buf being received
bool play_waiting = true;
bool amplifier_stopped = false;
long play_waiting_at;
bool left_btn_pressed;
bool right_btn_pressed;
#define ICACHE_RAM_ATTR __attribute__((section(".iram.text")))
#define twi_sda mySDA
#define twi_scl mySCL
#define twi_dcount 0
#define twi_clockStretchLimit 10
#define SDA_LOW() (GPES = (1 << twi_sda)) //Enable SDA (becomes output and since GPO is 0 for the pin, it will pull the line low)
#define SDA_HIGH() (GPEC = (1 << twi_sda)) //Disable SDA (becomes input and since it has pullup it will go high)
#define SDA_READ() ((GPI & (1 << twi_sda)) != 0)
#define SCL_LOW() (GPES = (1 << twi_scl))
#define SCL_HIGH() (GPEC = (1 << twi_scl))
#define SCL_READ() ((GPI & (1 << twi_scl)) != 0)
static void twi_delay(unsigned char v) {
unsigned int i;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
unsigned int reg;
for (i = 0; i<v; i++) reg = GPI;
#pragma GCC diagnostic pop
}
static inline ICACHE_RAM_ATTR bool twi_write_start(void) {
SCL_HIGH();
SDA_HIGH();
if (SDA_READ() == 0) return false;
SDA_LOW();
return true;
}
static inline ICACHE_RAM_ATTR bool twi_write_stop(void) {
uint32_t i = 0;
SCL_LOW();
SDA_LOW();
SCL_HIGH();
while (SCL_READ() == 0 && (i++) < twi_clockStretchLimit); // Clock stretching
SDA_HIGH();
return true;
}
static inline ICACHE_RAM_ATTR bool twi_write_bit(bool bit) {
uint32_t i = 0;
SCL_LOW();
if (bit) SDA_HIGH();
else SDA_LOW();
twi_delay(twi_dcount + 1);
SCL_HIGH();
while (SCL_READ() == 0 && (i++) < twi_clockStretchLimit);// Clock stretching
return true;
}
static inline ICACHE_RAM_ATTR bool twi_read_bit(void) {
uint32_t i = 0;
SCL_LOW();
SDA_HIGH();
twi_delay(twi_dcount + 2);
SCL_HIGH();
while (SCL_READ() == 0 && (i++) < twi_clockStretchLimit);// Clock stretching
bool bit = SDA_READ();
return bit;
}
static inline ICACHE_RAM_ATTR bool twi_write_byte(unsigned char byte) {
unsigned char bit;
for (bit = 0; bit < 8; bit++) {
twi_write_bit(byte & 0x80);
byte <<= 1;
}
return !twi_read_bit();//NACK/ACK
}
static inline ICACHE_RAM_ATTR unsigned char twi_read_byte(bool nack) {
unsigned char byte = 0;
unsigned char bit;
for (bit = 0; bit < 8; bit++) byte = (byte << 1) | twi_read_bit();
twi_write_bit(nack);
return byte;
}
unsigned char inline ICACHE_RAM_ATTR mytwi_writeTo(unsigned char address, unsigned char * buf, unsigned int len, unsigned char sendStop) {
unsigned int i;
if (!twi_write_start()) return 4;//line busy
if (!twi_write_byte(((address << 1) | 0) & 0xFF)) {
if (sendStop) twi_write_stop();
return 2; //received NACK on transmit of address
}
for (i = 0; i<len; i++) {
if (!twi_write_byte(buf[i])) {
if (sendStop) twi_write_stop();
return 3;//received NACK on transmit of data
}
}
if (sendStop) twi_write_stop();
i = 0;
while (SDA_READ() == 0 && (i++) < 10) {
SCL_LOW();
SCL_HIGH();
}
return 0;
}
static inline ICACHE_RAM_ATTR uint8_t DAC(uint16_t value)
{
/* value is 76543210 XXXXBA98
per the datasheet for fast write:
1 1 0 0 A2 A1 A0 0 <ACK> 0 0 PD1 PD0 D11 D10 D9 D8 <ACK> D7 D6 D5 D4 D3 D2 D1 D0 <ACK>
*/
uint8_t buf[2] = { (value >> 8) & 0x0F, (value & 0xFF) };
int ret = mytwi_writeTo(0x60, buf, 2, true);
Serial.println(value);
return ret;
}
void ICACHE_RAM_ATTR playsample_isr(void)
{
if (play_waiting) {
return;
}
DAC(data_buf[current_play_data_buf][play_data_buf_pos]);
play_data_buf_pos++;
if (play_data_buf_pos >= sizeof(data_buf[0]) / sizeof(data_buf[0][0])) {
play_data_buf_pos = 0;
current_play_data_buf++;
if (current_play_data_buf == NB_DATA_BUFS) {
current_play_data_buf = 0;
}
if (current_play_data_buf == current_recv_data_buf) {
play_waiting = true;
play_waiting_at = micros();
}
}
}
void ota_onstart(void)
{
// Disable timer when an OTA happens
timer1_detachInterrupt();
timer1_disable();
}
void ota_onprogress(unsigned int sz, unsigned int total)
{
Serial.print("OTA: "); Serial.print(sz); Serial.print("/"); Serial.print(total);
Serial.print("="); Serial.print(100 * sz / total); Serial.println("%");
}
void ota_onerror(ota_error_t err)
{
Serial.print("OTA ERROR:"); Serial.println((int)err);
}
void left_btn_intr()
{
left_btn_pressed = 1;
}
void right_btn_intr()
{
right_btn_pressed = 1;
}
void setup(void)
{
Serial.begin(115200);
Serial.println("I was built on " __DATE__ " at " __TIME__ "");
i2c.begin(mySDA, mySCL);
i2c.setClock(400000);
WiFi.mode(WIFI_STA);
WiFi.begin("valik 2", "299745buhlo");
WiFi.setSleepMode(WIFI_MODEM_SLEEP);
Serial.print("Connecting to wifi");
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.print(".");
}
Serial.println("");
Serial.print("Cnnectd to ");
Serial.println("valik 2");
Serial.print("IP ");
Serial.println(WiFi.localIP());
ArduinoOTA.onStart(ota_onstart);
ArduinoOTA.onError(ota_onerror);
ArduinoOTA.onProgress(ota_onprogress);
ArduinoOTA.setHostname("bb-recv");
ArduinoOTA.begin();
timer1_isr_init();
timer1_attachInterrupt(playsample_isr);
timer1_enable(TIM_DIV16, TIM_EDGE, TIM_LOOP);
timer1_write(clockCyclesPerMicrosecond() / 16 * 50); //50us = 20 kHz sampling freq
udp.begin(udp_recv_port);
pinMode(AMPLI_MUTE_PIN, OUTPUT);
pinMode(AMPLI_SHUTDOWN_PIN, OUTPUT);
digitalWrite(AMPLI_SHUTDOWN_PIN, 0);
digitalWrite(AMPLI_MUTE_PIN, 0);
pinMode(LEFT_BTN, INPUT_PULLUP);
attachInterrupt(digitalPinToInterrupt(LEFT_BTN), left_btn_intr, FALLING);
pinMode(RIGHT_BTN, INPUT_PULLUP);
attachInterrupt(digitalPinToInterrupt(RIGHT_BTN), right_btn_intr, FALLING);
pinMode(LED1, OUTPUT);
digitalWrite(LED1, 0);
}
int do_undelta7(const uint8_t *val, int sz, uint16_t *out)
{
// Implement delta 7 decompression.
// First bit = 0 <=> uncompressed 15 bits following
// First bit = 1 <=> 7 bits follow representing delta
// must switch to big endian...
uint16_t last = 0;
uint8_t *ptr = (uint8_t *)&out[0];
const uint8_t *start = ptr;
for (int i = 0; i < sz; i++) {
uint16_t *ptr16 = (uint16_t *)ptr;
const int8_t firstbyte = val[i];
if (firstbyte & 0x80) {
// Delta7 compressed
// byte is CSMMMMMM
int8_t delta = firstbyte & 0x3F;
if (firstbyte & 0x40) {
delta = -delta;
}
const uint16_t value = last + delta;
*ptr16 = value;
ptr += 2;
last = value;
}
else {
// uncompressed -- switch bytes back to LE
*ptr++ = val[i + 1];
*ptr++ = val[i];
last = val[i + 1] | val[i] << 8;
i++;
}
}
return ptr - start;
}
void loop(void)
{
ArduinoOTA.handle();
int sz = udp.parsePacket();
//Serial.println(current_play_data_buf);
if (sz) {
uint8_t buf[sz];
udp.read(&buf[0], sz);
current_recv_data_buf++;
if (current_recv_data_buf == NB_DATA_BUFS) {
current_recv_data_buf = 0;
if (current_recv_data_buf == current_play_data_buf && !play_waiting) {
Serial.println("buffer overflow when receiving");
}
}
do_undelta7(buf, sz, &data_buf[current_recv_data_buf][0]);
if (play_waiting) {
Serial.print("Restarting play, was waiting (us)"); Serial.println(micros() - play_waiting_at);
// Re-enable *then* unmute in that order to avoid pops
digitalWrite(AMPLI_SHUTDOWN_PIN, 1);
digitalWrite(AMPLI_MUTE_PIN, 1);
play_waiting = false;
amplifier_stopped = false;
digitalWrite(LED1, 1);
}
Serial.println("");
}
// If not playing anything, but amplifier is still up
if (!amplifier_stopped && play_waiting) {
if ((micros() - play_waiting_at) > 2000 * 1000) {
// If nothing has been played for two seconds, shut down the amplifier
Serial.println("Shutting down amplifier!");
digitalWrite(AMPLI_SHUTDOWN_PIN, 0);
digitalWrite(AMPLI_MUTE_PIN, 0);
amplifier_stopped = true;
digitalWrite(LED1, 0);
}
}
if (left_btn_pressed) {
left_btn_pressed = 0;
digitalWrite(AMPLI_MUTE_PIN, 0);
digitalWrite(AMPLI_SHUTDOWN_PIN, 0);
}
if (right_btn_pressed) {
digitalWrite(AMPLI_SHUTDOWN_PIN, 1);
digitalWrite(AMPLI_MUTE_PIN, 1);
udp.beginPacket(udp.remoteIP(), 45990);
udp.write("sendnow");
udp.endPacket();
right_btn_pressed = 0;
}
// If the amplifier is stopped, add a delay for power saving
if (amplifier_stopped) {
delay(10);
}
}
This is my attempt to translate code to c++ for windows. But i encountered problem where programm just freeze without any errors and without closing.
#include "stdafx.h"
#include <winsock2.h>
#include <stdio.h>
#include <cstdint>
#include <ctime>
#pragma comment (lib, "ws2_32.lib")
#define NB_DATA_BUFS 5
uint16_t data_buf[NB_DATA_BUFS][700]; // data buffer, N buffered
unsigned int current_play_data_buf; // current data buf being played
unsigned int play_data_buf_pos; // position in the ADC data buffer
unsigned int current_recv_data_buf; // current data buf being received
bool play_waiting = true;
bool amplifier_stopped = false;
long play_waiting_at;
bool left_btn_pressed;
bool right_btn_pressed;
void InitWinsock()
{
WSADATA wsaData;
WSAStartup(MAKEWORD(2, 2), &wsaData);
}
int do_undelta7(const uint8_t *val, int sz, uint16_t *out)
{
// Implement delta 7 decompression.
// First bit = 0 <=> uncompressed 15 bits following
// First bit = 1 <=> 7 bits follow representing delta
// must switch to big endian...
uint16_t last = 0;
uint8_t *ptr = (uint8_t *)&out[0];
const uint8_t *start = ptr;
for (int i = 0; i < sz; i++) {
uint16_t *ptr16 = (uint16_t *)ptr;
const int8_t firstbyte = val[i];
if (firstbyte & 0x80) {
// Delta7 compressed
// byte is CSMMMMMM
int8_t delta = firstbyte & 0x3F;
if (firstbyte & 0x40) {
delta = -delta;
}
const uint16_t value = last + delta;
*ptr16 = value;
ptr += 2;
last = value;
}
else {
// uncompressed -- switch bytes back to LE
*ptr++ = val[i + 1];
*ptr++ = val[i];
last = val[i + 1] | val[i] << 8;
i++;
}
}
return ptr - start;
}
void DAC(uint16_t value)
{
/* value is 76543210 XXXXBA98
per the datasheet for fast write:
1 1 0 0 A2 A1 A0 0 <ACK> 0 0 PD1 PD0 D11 D10 D9 D8 <ACK> D7 D6 D5 D4 D3 D2 D1 D0 <ACK>
*/
uint8_t buf[2] = { (value >> 8) & 0x0F, (value & 0xFF) };
printf("%u\n", value & 0xFFF);
}
int _tmain(int argc, _TCHAR* argv[])
{
SOCKET socketC;
InitWinsock();
struct sockaddr_in serverInfo;
int len = 2000;
serverInfo.sin_family = AF_INET;
serverInfo.sin_port = htons(45990);
serverInfo.sin_addr.s_addr = inet_addr("192.168.1.105");
socketC = socket(AF_INET, SOCK_DGRAM, 0);
char buffers[16];
ZeroMemory(buffers, sizeof(buffers));
sendto(socketC, buffers, sizeof(IReleaseMarshalBuffers), 0, (sockaddr*)&serverInfo, len);
while (1)
{
sockaddr_in from;
const int paketSize = sizeof(from);
int r = paketSize;
char buffer[paketSize];
sprintf(buffer, "%.7s", "sendnow");
if (strcmp(buffer, "exit") == 0)
break;
recvfrom(socketC, buffer, sizeof(buffer), 0, (sockaddr*)&serverInfo, &len);
uint8_t buf[sizeof(buffer)];
uint8_t * bufeerPntr = (uint8_t*)buffer;
uint8_t * bufPntr = (uint8_t*)buffer;
for(int i=0;i<sizeof(buffer);i++)
{
buf[i] = buffer[i];
}
//udp.read(&buf[0], sizeof(buffer));
current_recv_data_buf++;
if (current_recv_data_buf == NB_DATA_BUFS) {
current_recv_data_buf = 0;
if (current_recv_data_buf == current_play_data_buf && !play_waiting) {
printf("buffer overflow when receiving\n");
}
}
do_undelta7(buf, sizeof(buffer), &data_buf[current_recv_data_buf][0]);
}
closesocket(socketC);
return 0;
}
This is my attempt to translate decoding part to c# (c# is much easier for me to understand) but i forced to use pointers and strange * and & things which i have difficulty to understand:
using System;
using System.Net;
using System.Net.Sockets;
using System.Text;
public class UDPListener
{
private const int listenPort = 45990;
public static int Main()
{
bool done = false;
UdpClient listener = new UdpClient(listenPort);
IPEndPoint groupEP = new IPEndPoint(IPAddress.Parse("192.168.1.3"), listenPort);
string received_data;
int BUFSIZE = 700;
byte[] receive_byte_array;
uint current_recv_data_buf = 1;
while (!done)
{
Console.WriteLine("Waiting for broadcast");
receive_byte_array = listener.Receive(ref groupEP);
Console.WriteLine("Received a broadcast from {0}", groupEP.ToString());
received_data = Encoding.ASCII.GetString(receive_byte_array, 0, receive_byte_array.Length);
unsafe
{
UInt16*[,] data_buf = new UInt16*[5, 700];
int sz = receive_byte_array.Length;
if (sz > 0)
{
byte[] buf = new byte[receive_byte_array.Length];
UInt16* f = stackalloc UInt16[2000];
//udp.read(&buf[0], sz);
buf = receive_byte_array;
current_recv_data_buf++;
UInt16 last = 0;
UInt16* #out1 = stackalloc UInt16[800];
for (int i = 0; i < 800; i++)
{
#out1[i] = (char)i;
}
//UIntPtr* ptr = (UIntPtr*)&#out[0];
UIntPtr* ptr = (UIntPtr*)&#out1[0];
UIntPtr* start = ptr;
for (int i = 0; i < sz; i++)
{
UIntPtr* ptr16 = ptr;
byte firstbyte = buf[i];
if ((firstbyte & 0x80) != 0)
{
// Delta7 compressed
// byte is CSMMMMMM
byte delta = (byte)(firstbyte & 0x3F);
if ((firstbyte & 0x40) != 0)
{
delta = (byte)(0 - delta);
}
UInt16 value = (UInt16)(last + delta);
*ptr16 = (UIntPtr)value;
ptr += 2;
last = value;
}
else
{
*ptr++ = (UIntPtr)buf[i + 1];
*ptr++ = (UIntPtr)buf[i];
last = (UInt16)(buf[i + 1] | buf[i] << 8);
i++;
}
}
for (int i = 0; i < 91; i++)
{
System.Console.WriteLine(#out1[i]);
}
string b = "";
}
}
}
listener.Close();
return 0;
}
} // end of class UDPListener
udp.read(&buf[0], sz); copies the received UDP packet into buffer buf. The function do_undelta7 then makes a decompression of the data in the input buffer to output buffer data_buf[current_recv_data_buf]. data_buf is array of buffers. The interrupt playsample_isr plays the content of the buffers in data_buf.
i missed an unsigned byte in do_undelta7
so now it's decoding well
c#
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Media;
using System.Net;
using System.Net.Sockets;
using System.Text;
using System.Threading;
using NAudio.Wave;
public class UDPListener
{
uint current_recv_data_buf;
static int NB_DATA_BUFS = 5;
static UInt16[] data_buf = new UInt16[700];
uint current_play_data_buf; // current data buf being played
uint play_data_buf_pos; // position in the ADC data buffer
private const int listenPort = 45990;
public static unsafe int Main()
{
bool done = false;
UdpClient listener = new UdpClient(listenPort);
IPEndPoint groupEP = new IPEndPoint(IPAddress.Parse("192.168.1.3"), listenPort);
int BUFSIZE = 700;
byte[] receive_byte_array;
uint current_recv_data_buf = 1;
List<byte> tenBuffsToPlay = new List<byte>();
int iterBuffsToPLay = 0;
byte[] byteArrToPlay = new byte[data_buf.Length * 2];
byte[] byte10ArrToPlay;
int pktcount = 0;
var sampleRate = 20000;
var frequency = 500;
var amplitude = 0.2;
var seconds = 5;
while (!done)
{
receive_byte_array = listener.Receive(ref groupEP);
if (receive_byte_array.Length > 0)
{
Console.WriteLine("received !"+pktcount++);
int sz = receive_byte_array.Length;
unsafe
{
byte[] buf = new byte[sz];
buf = receive_byte_array;
fixed (UInt16* data_bufPtr = &data_buf[0])
fixed (byte* ptrbuf = buf)
do_undelta7(ptrbuf, sz, data_bufPtr);
//string firstPart = "";
//string secondPart = "";
for (int i =0;i<data_buf.Length;i++)
{
//Console.WriteLine("Hex: {0:X}", data_buf[i]);
byteArrToPlay[i*2] = (byte)((data_buf[i] >> 8)&0x0f);
byteArrToPlay[(i*2)+1] = (byte)(data_buf[i] & 0xff);
//firstPart = Convert.ToString(byteArrToPlay[i], 2).PadLeft(4, '0');
//Console.Write(firstPart);
//secondPart = Convert.ToString(byteArrToPlay[i+1], 2).PadLeft(4, '0');
//Console.Write(secondPart+"\n");
}
//byteArrToPlay = data_buf.SelectMany(BitConverter.GetBytes).ToArray();
//foreach (var Arr in byteArrToPlay)
// {
// Console.WriteLine("Hex: {0:X}", Arr);
// }
tenBuffsToPlay.AddRange(byteArrToPlay);
iterBuffsToPLay++;
if (iterBuffsToPLay == 3)
{
byte10ArrToPlay = tenBuffsToPlay.ToArray();
/*var raw = new byte[sampleRate * seconds * 2];
var multiple = 2.0 * frequency / sampleRate;
for (int n = 0; n < sampleRate * seconds; n++)
{
var sampleSaw = ((n * multiple) % 2) - 1;
var sampleValue = sampleSaw > 0 ? amplitude : -amplitude;
var sample = (short)(sampleValue * Int16.MaxValue);
var bytes = BitConverter.GetBytes(sample);
raw[n * 2] = bytes[0];
raw[n * 2 + 1] = bytes[1];
}*/
var ms = new MemoryStream(byte10ArrToPlay);
var rs = new RawSourceWaveStream(ms, new WaveFormat(sampleRate, 16, 1));
var wo = new WaveOutEvent();
wo.Init(rs);
wo.Play();
/*while (wo.PlaybackState == PlaybackState.Playing)
{
Thread.Sleep(1);
}*/
//wo.Dispose();
/*using (MemoryStream ms = new MemoryStream())
{
WriteWavHeader(ms, false, 1, 16, 20000, (byte10ArrToPlay.Length / 2 - 45));
// Construct the sound player
ms.Write(byte10ArrToPlay, 0, byte10ArrToPlay.Length);
ms.Position = 0;
SoundPlayer player = new SoundPlayer(ms);
player.Play();
}*/
tenBuffsToPlay.Clear();
iterBuffsToPLay = 0;
}
}
}
}
listener.Close();
return 0;
}
static unsafe long do_undelta7(byte* val, int sz, UInt16* outArray)
{
// Implement delta 7 decompression.
// First bit = 0 <=> uncompressed 15 bits following
// First bit = 1 <=> 7 bits follow representing delta
// must switch to big endian...
UInt16 last = 0;
byte* ptr = (byte*)&outArray[0];
byte* start = ptr;
for (int i = 0; i < sz; i++)
{
UInt16* ptr16 = (UInt16*)ptr;
byte firstbyte = val[i];
var bit = (firstbyte & (1 << 8 - 1)) != 0;
if (bit == true)
{
// Delta7 compressed
// byte is CSMMMMMM
sbyte delta = (sbyte)(firstbyte & 0x3f);
bit = (firstbyte & (1 << 7 - 1)) != 0;
if (bit == true)
{
delta = (sbyte)(0x0 - delta);
}
UInt16 value = (UInt16)(last + delta);
*ptr16 = value;
ptr += 2;
last = value;
}
else
{
// uncompressed -- switch bytes back to LE
*ptr++ = val[i + 1];
*ptr++ = val[i];
last = (UInt16)(val[i + 1] | val[i] << 8);
i++;
}
}
return ptr - start;
}
private static void WriteWavHeader(MemoryStream stream, bool isFloatingPoint, ushort channelCount, ushort bitDepth, int sampleRate, int totalSampleCount)
{
stream.Position = 0;
stream.Write(Encoding.ASCII.GetBytes("RIFF"), 0, 4);
stream.Write(BitConverter.GetBytes((2* totalSampleCount) + 36), 0, 4);
stream.Write(Encoding.ASCII.GetBytes("WAVE"), 0, 4);
stream.Write(Encoding.ASCII.GetBytes("fmt "), 0, 4);
stream.Write(BitConverter.GetBytes(16), 0, 4);
stream.Write(BitConverter.GetBytes((ushort)(isFloatingPoint ? 3 : 1)), 0, 2);
stream.Write(BitConverter.GetBytes(channelCount), 0, 2);
stream.Write(BitConverter.GetBytes(sampleRate), 0, 4);
stream.Write(BitConverter.GetBytes(sampleRate * 2), 0, 4);
stream.Write(BitConverter.GetBytes((ushort)2), 0, 2);
stream.Write(BitConverter.GetBytes(16), 0, 2);
stream.Write(Encoding.ASCII.GetBytes("data"), 0, 4);
stream.Write(BitConverter.GetBytes(2 * totalSampleCount), 0, 4);
}
} // end of class UDPListener
I have an array of audio data, which is a lot of Int32 numbers represented by array of bytes (each 4 byte element represents an Int32) and i want to do some manipulation on the data (for example, add 10 to each Int32).
I converted the bytes to Int32, do the manipulation and convert it back to bytes as in this example:
//byte[] buffer;
for (int i=0; i<buffer.Length; i+=4)
{
Int32 temp0 = BitConverter.ToInt32(buffer, i);
temp0 += 10;
byte[] temp1 = BitConverter.GetBytes(temp0);
for (int j=0;j<4;j++)
{
buffer[i + j] = temp1[j];
}
}
But I would like to know if there is a better way to do such manipulation.
You can check the .NET Reference Source for pointers (grin) on how to convert from/to big endian.
class intFromBigEndianByteArray {
public byte[] b;
public int this[int i] {
get {
i <<= 2; // i *= 4; // optional
return (int)b[i] << 24 | (int)b[i + 1] << 16 | (int)b[i + 2] << 8 | b[i + 3];
}
set {
i <<= 2; // i *= 4; // optional
b[i ] = (byte)(value >> 24);
b[i + 1] = (byte)(value >> 16);
b[i + 2] = (byte)(value >> 8);
b[i + 3] = (byte)value;
}
}
}
and sample use:
byte[] buffer = { 127, 255, 255, 255, 255, 255, 255, 255 };//big endian { int.MaxValue, -1 }
//bool check = BitConverter.IsLittleEndian; // true
//int test = BitConverter.ToInt32(buffer, 0); // -129 (incorrect because little endian)
var fakeIntBuffer = new intFromBigEndianByteArray() { b = buffer };
fakeIntBuffer[0] += 2; // { 128, 0, 0, 1 } = big endian int.MinValue - 1
fakeIntBuffer[1] += 2; // { 0, 0, 0, 1 } = big endian 1
Debug.Print(string.Join(", ", buffer)); // "128, 0, 0, 0, 1, 0, 0, 1"
For better performance you can look into parallel processing and SIMD instructions - Using SSE in C#
For even better performance, you can look into Utilizing the GPU with c#
How about the following approach:
struct My
{
public int Int;
}
var bytes = Enumerable.Range(0, 20).Select(n => (byte)(n + 240)).ToArray();
foreach (var b in bytes) Console.Write("{0,-4}", b);
// Pin the managed memory
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
for (int i = 0; i < bytes.Length; i += 4)
{
// Copy the data
My my = (My)Marshal.PtrToStructure<My>(handle.AddrOfPinnedObject() + i);
my.Int += 10;
// Copy back
Marshal.StructureToPtr(my, handle.AddrOfPinnedObject() + i, true);
}
// Unpin
handle.Free();
foreach (var b in bytes) Console.Write("{0,-4}", b);
I made it just for fun.
Not sure that's less ugly.
I don't know, will it be faster? Test it.
I am currently using BitConverter to package two unsigned shorts inside a signed int. This code executes millions of times for different values and I am thinking the code could be optimized further. Here is what I am currently doing -- you can assume the code is C#/NET.
// to two unsigned shorts from one signed int:
int xy = 343423;
byte[] bytes = BitConverter.GetBytes(xy);
ushort m_X = BitConverter.ToUInt16(bytes, 0);
ushort m_Y = BitConverter.ToUInt16(bytes, 2);
// convet two unsigned shorts to one signed int
byte[] xBytes = BitConverter.GetBytes(m_X);
byte[] yBytes = BitConverter.GetBytes(m_Y);
byte[] bytes = new byte[] {
xBytes[0],
xBytes[1],
yBytes[0],
yBytes[1],
};
return BitConverter.ToInt32(bytes, 0);
So it occurs to me that I can avoid the overhead of constructing arrays if I bitshift. But for the life of me I can't figure out what the correct shift operation is. My first pathetic attempt involved the following code:
int xy = 343423;
const int mask = 0x00000000;
byte b1, b2, b3, b4;
b1 = (byte)((xy >> 24));
b2 = (byte)((xy >> 16));
b3 = (byte)((xy >> 8) & mask);
b4 = (byte)(xy & mask);
ushort m_X = (ushort)((xy << b4) | (xy << b3));
ushort m_Y = (ushort)((xy << b2) | (xy << b1));
Could someone help me? I am thinking I need to mask the upper and lower bytes before shifting. Some of the examples I see include subtraction with type.MaxValue or an arbitrary number, like negative twelve, which is pretty confusing.
** Update **
Thank you for the great answers. Here are the results of a benchmark test:
// 34ms for bit shift with 10M operations
// 959ms for BitConverter with 10M operations
static void Main(string[] args)
{
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
for (int i = 0; i < 10000000; i++)
{
ushort x = (ushort)i;
ushort y = (ushort)(i >> 16);
int result = (y << 16) | x;
}
stopWatch.Stop();
Console.WriteLine((int)stopWatch.Elapsed.TotalMilliseconds + "ms");
stopWatch.Start();
for (int i = 0; i < 10000000; i++)
{
byte[] bytes = BitConverter.GetBytes(i);
ushort x = BitConverter.ToUInt16(bytes, 0);
ushort y = BitConverter.ToUInt16(bytes, 2);
byte[] xBytes = BitConverter.GetBytes(x);
byte[] yBytes = BitConverter.GetBytes(y);
bytes = new byte[] {
xBytes[0],
xBytes[1],
yBytes[0],
yBytes[1],
};
int result = BitConverter.ToInt32(bytes, 0);
}
stopWatch.Stop();
Console.WriteLine((int)stopWatch.Elapsed.TotalMilliseconds + "ms");
Console.ReadKey();
}
The simplest way is to do it using two shifts:
int xy = -123456;
// Split...
ushort m_X = (ushort) xy;
ushort m_Y = (ushort)(xy>>16);
// Convert back...
int back = (m_Y << 16) | m_X;
Demo on ideone: link.
int xy = 343423;
ushort low = (ushort)(xy & 0x0000ffff);
ushort high = (ushort)((xy & 0xffff0000) >> 16);
int xxyy = low + (((int)high) << 16);
I have the following C# implementation of triple DES
byte[] bKey = HexToBytes("C67DDB0CE47D27FAF6F32ECA5C99E8AF");
byte[] bMsg = HexToBytes("ff00");
byte[] iv = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
DESCryptoServiceProvider des = new DESCryptoServiceProvider();
des.Padding = PaddingMode.Zeros;
des.Mode = CipherMode.CBC;
byte[] bK1 = new byte[8];
for (int i = 0; i < 8; i++) bK1[i] = bKey[i];
byte[] bK2 = new byte[8];
for (int i = 0; i < 8; i++) bK2[i] = bKey[i + 8];
ICryptoTransform ict1 = des.CreateEncryptor(bK1, iv);
byte[] bFt = ict1.TransformFinalBlock(bMsg, 0, bMsg.Length);
byte[] bLCb = new byte[8];
for (int i = 0; i < 8; i++) bLCb[i] = bFt[i + bFt.Length - 8];
des.Mode = CipherMode.ECB;
ICryptoTransform ict1_5 = des.CreateDecryptor(bK2, iv);
bLCb = ict1_5.TransformFinalBlock(bLCb, 0, bLCb.Length);
ICryptoTransform ict2 = des.CreateEncryptor(bK1, iv);
byte[] bMac = ict2.TransformFinalBlock(bLCb, 0, bLCb.Length);
ToHex(bMac); // outputs: 4BC0479D7889CF8E
I need to produce same result in Java/Groovy, in which I'm apparently stuck.
The code I have for now is as follows:
byte[] bKey = Hex.decode("C67DDB0CE47D27FAF6F32ECA5C99E8AF")
byte[] bMsg = Hex.decode("ff00")
byte[] keyBytes = Arrays.copyOf(sKey.bytes, 24)
int j = 0, k = 16
while (j < 8) {
keyBytes[k++] = keyBytes[j++]
}
SecretKey key3 = new SecretKeySpec(keyBytes, "DESede")
IvParameterSpec iv3 = new IvParameterSpec(new byte[8])
Cipher cipher3 = Cipher.getInstance("DESede/CBC/PKCS5Padding")
cipher3.init(Cipher.ENCRYPT_MODE, key3, iv3)
byte[] bMac = cipher3.doFinal(bMsg)
println new String(Hex.encode(bMac))
This one outpus: ef2c57c3fa18d0a5
Hex.decode() here is of bouncy castle
I have also tried to reproduce same C# code in java by using DES/CBC twice and EBC in final round, which gave me even different result: 48f63c809c38e1eb
It'd be great if someone could give me a hint of what I may be doing wrong
Update:
Thanks everyone for your help! Final code that works as needed without much tweaking:
Security.addProvider(new BouncyCastleProvider())
byte[] bKey = Hex.decode("C67DDB0CE47D27FAF6F32ECA5C99E8AF")
byte[] bMsg = Hex.decode("ff00")
byte[] keyBytes = Arrays.copyOf(sKey.bytes, 24)
int j = 0, k = 16
while (j < 8) {
keyBytes[k++] = keyBytes[j++]
}
SecretKey key3 = new SecretKeySpec(keyBytes, "DESede")
IvParameterSpec iv3 = new IvParameterSpec(new byte[8])
Cipher cipher3 = Cipher.getInstance("DESede/CBC/ZeroBytePadding")
cipher3.init(Cipher.ENCRYPT_MODE, key3, iv3)
byte[] bMac = cipher3.doFinal(bMsg)
println new String(Hex.encode(bMac))
You're using some non-standard padding and block chaining. You won't be able to use DESede. Try DES instead:
import javax.crypto.*
import javax.crypto.spec.*
def key1 = new SecretKeySpec("C67DDB0CE47D27FA".decodeHex(), "DES")
def key2 = new SecretKeySpec("F6F32ECA5C99E8AF".decodeHex(), "DES")
def plaintext = ("ff00" + "000000000000").decodeHex() // manually zero pad
def c1 = Cipher.getInstance("DES/CBC/NoPadding")
c1.init(Cipher.ENCRYPT_MODE, key1, new IvParameterSpec(new byte[8]))
def cipherText1 = c1.doFinal(plaintext)
def c2 = Cipher.getInstance("DES/CBC/NoPadding")
c2.init(Cipher.DECRYPT_MODE, key2, new IvParameterSpec(new byte[8]))
def cipherText2 = c2.doFinal(cipherText1)
def c3 = Cipher.getInstance("DES/ECB/NoPadding")
c3.init(Cipher.ENCRYPT_MODE, key1)
def cipherText3 = c3.doFinal(cipherText2)
assert cipherText3.encodeHex().toString() == "4bc0479d7889cf8e"