Skip to content

Commit

Permalink
Merge branch 'c98' into next
Browse files Browse the repository at this point in the history
Now VOLK_GNSSSDR is built with the default c98 standard instead of c11.
This allows the usage of more compilers
  • Loading branch information
carlesfernandez committed May 31, 2016
2 parents 0aa980a + 2f339d2 commit d490191
Show file tree
Hide file tree
Showing 32 changed files with 844 additions and 674 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ enable_language(CXX)
enable_language(C)
enable_testing()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11 -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")

option(ENABLE_STRIP "Create a stripped volk_gnsssdr_profile binary (without shared libraries)" OFF)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@

static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
for(unsigned int i = 0; i < num_points; i++)
unsigned int i;
for(i = 0; i < num_points; i++)
{
outputVector[i] = lv_cmake((float)lv_creal(inputVector[i]), (float)lv_cimag(inputVector[i]));
}
Expand All @@ -76,22 +77,19 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto
static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;

unsigned int i;
const lv_16sc_t* _in = inputVector;
lv_32fc_t* _out = outputVector;
__m128 a;
for(unsigned int number = 0; number < sse_iters; number++)

for(i = 0; i < sse_iters; i++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // load (2 byte imag, 2 byte real) x 2 into 128 bits reg
_mm_store_ps((float*)_out, a);
_in += 2;
_out += 2;
//*_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in));
//_in++;
//*_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in));
//_in++;
}
for (unsigned int i = 0; i < (num_points % 2); ++i)
for (i = 0; i < (num_points % 2); ++i)
{
*_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
_in++;
Expand All @@ -106,18 +104,19 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector
static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;

unsigned int i;
const lv_16sc_t* _in = inputVector;
lv_32fc_t* _out = outputVector;
__m128 a;
for(unsigned int number = 0; number < sse_iters; number++)

for(i = 0; i < sse_iters; i++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
_mm_storeu_ps((float*)_out, a);
_in += 2;
_out += 2;
}
for (unsigned int i = 0; i < (num_points % 2); ++i)
for (i = 0; i < (num_points % 2); ++i)
{
*_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
_in++;
Expand All @@ -132,19 +131,20 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector
static inline void volk_gnsssdr_16ic_convert_32fc_u_axv(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;

unsigned int i;
const lv_16sc_t* _in = inputVector;
lv_32fc_t* _out = outputVector;
__m256 a;
for(unsigned int number = 0; number < sse_iters; number++)

for(i = 0; i < sse_iters; i++)
{
a = _mm256_set_ps((float)(lv_cimag(_in[3])), (float)(lv_creal(_in[3])), (float)(lv_cimag(_in[2])), (float)(lv_creal(_in[2])), (float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
_mm256_storeu_ps((float*)_out, a);
_in += 4;
_out += 4;
}
_mm256_zeroupper();
for (unsigned int i = 0; i < (num_points % 4); ++i)
for(i = 0; i < (num_points % 4); ++i)
{
*_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
_in++;
Expand All @@ -158,19 +158,20 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_axv(lv_32fc_t* outputVector,
static inline void volk_gnsssdr_16ic_convert_32fc_a_axv(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;

unsigned int i;
const lv_16sc_t* _in = inputVector;
lv_32fc_t* _out = outputVector;
__m256 a;
for(unsigned int number = 0; number < sse_iters; number++)

for(i = 0; i < sse_iters; i++)
{
a = _mm256_set_ps((float)(lv_cimag(_in[3])), (float)(lv_creal(_in[3])), (float)(lv_cimag(_in[2])), (float)(lv_creal(_in[2])), (float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
_mm256_store_ps((float*)_out, a);
_in += 4;
_out += 4;
}
_mm256_zeroupper();
for (unsigned int i = 0; i < (num_points % 4); ++i)
for(i = 0; i < (num_points % 4); ++i)
{
*_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
_in++;
Expand All @@ -185,15 +186,15 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_axv(lv_32fc_t* outputVector,
static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;

unsigned int i;
const lv_16sc_t* _in = inputVector;
lv_32fc_t* _out = outputVector;

int16x4_t a16x4;
int32x4_t a32x4;
float32x4_t f32x4;

for(unsigned int number = 0; number < sse_iters; number++)
for(i = 0; i < sse_iters; i++)
{
a16x4 = vld1_s16((const int16_t*)_in);
__builtin_prefetch(_in + 4);
Expand All @@ -203,7 +204,7 @@ static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector,
_in += 2;
_out += 2;
}
for (unsigned int i = 0; i < (num_points % 2); ++i)
for (i = 0; i < (num_points % 2); ++i)
{
*_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
_in++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@
static inline void volk_gnsssdr_16ic_resampler_fast_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
{
int local_code_chip_index;
unsigned int n;
//fesetround(FE_TONEAREST);
for (unsigned int n = 0; n < num_output_samples; n++)
for (n = 0; n < num_output_samples; n++)
{
// resample code for current tap
local_code_chip_index = round(code_phase_step_chips * (float)n + rem_code_phase_chips - 0.5f);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,11 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_generic(lv_16sc_
float code_phase_step_chips = 0.1;
int code_length_chips = 2046;
int num_out_vectors = 3;
unsigned int n;
float* rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment());

lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_out_vectors; n++)

for(n = 0; n < num_out_vectors; n++)
{
rem_code_phase_chips[n] = -0.234;
result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
Expand All @@ -59,7 +60,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_generic(lv_16sc_

memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points);
volk_gnsssdr_free(rem_code_phase_chips);
for(unsigned int n = 0; n < num_out_vectors; n++)
for(n = 0; n < num_out_vectors; n++)
{
volk_gnsssdr_free(result_aux[n]);
}
Expand All @@ -75,9 +76,11 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_a_sse2(lv_16sc_t
float code_phase_step_chips = 0.1;
int code_length_chips = 2046;
int num_out_vectors = 3;
unsigned int n;
float * rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment());
lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_out_vectors; n++)

for(n = 0; n < num_out_vectors; n++)
{
rem_code_phase_chips[n] = -0.234;
result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
Expand All @@ -86,7 +89,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_a_sse2(lv_16sc_t

memcpy(result, result_aux[0], sizeof(lv_16sc_t) * num_points);
volk_gnsssdr_free(rem_code_phase_chips);
for(unsigned int n = 0; n < num_out_vectors; n++)
for(n = 0; n < num_out_vectors; n++)
{
volk_gnsssdr_free(result_aux[n]);
}
Expand All @@ -102,9 +105,11 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_u_sse2(lv_16sc_t
float code_phase_step_chips = 0.1;
int code_length_chips = 2046;
int num_out_vectors = 3;
unsigned int n;
float * rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment());
lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_out_vectors; n++)

for(n = 0; n < num_out_vectors; n++)
{
rem_code_phase_chips[n] = -0.234;
result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
Expand All @@ -113,7 +118,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_u_sse2(lv_16sc_t

memcpy(result, result_aux[0], sizeof(lv_16sc_t) * num_points);
volk_gnsssdr_free(rem_code_phase_chips);
for(unsigned int n = 0; n < num_out_vectors; n++)
for(n = 0; n < num_out_vectors; n++)
{
volk_gnsssdr_free(result_aux[n]);
}
Expand All @@ -129,9 +134,11 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_neon(lv_16sc_t*
float code_phase_step_chips = 0.1;
int code_length_chips = 2046;
int num_out_vectors = 3;
unsigned int n;
float * rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment());
lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_out_vectors; n++)

for(n = 0; n < num_out_vectors; n++)
{
rem_code_phase_chips[n] = -0.234;
result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
Expand All @@ -140,7 +147,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_neon(lv_16sc_t*

memcpy(result, result_aux[0], sizeof(lv_16sc_t) * num_points);
volk_gnsssdr_free(rem_code_phase_chips);
for(unsigned int n = 0; n < num_out_vectors; n++)
for(n = 0; n < num_out_vectors; n++)
{
volk_gnsssdr_free(result_aux[n]);
}
Expand Down
Loading

0 comments on commit d490191

Please sign in to comment.