mirror of
https://gitlab.freedesktop.org/pipewire/pipewire.git
synced 2026-01-26 06:07:53 +00:00
spa: use the right AVX2 flags
Our AVX optimizations are really AVX2 so rename the files and functions and use the right HAVE_AVX2 and cpu flags to compile and select the right functions. Fixes #5072
This commit is contained in:
parent
c1f7963c2a
commit
13131706aa
@ -156,17 +156,17 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined (HAVE_AVX) && defined(HAVE_FMA)
|
||||
if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3)) {
|
||||
#if defined (HAVE_AVX2) && defined(HAVE_FMA)
|
||||
if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3)) {
|
||||
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
|
||||
spa_zero(r);
|
||||
r.channels = 2;
|
||||
r.cpu_flags = SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3;
|
||||
r.cpu_flags = SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3;
|
||||
r.i_rate = in_rates[i];
|
||||
r.o_rate = out_rates[i];
|
||||
r.quality = RESAMPLE_DEFAULT_QUALITY;
|
||||
resample_native_init(&r);
|
||||
run_test("native", "avx", &r);
|
||||
run_test("native", "avx2", &r);
|
||||
resample_free(&r);
|
||||
}
|
||||
}
|
||||
|
||||
@ -72,15 +72,15 @@ if have_sse41
|
||||
simd_cargs += ['-DHAVE_SSE41']
|
||||
simd_dependencies += audioconvert_sse41
|
||||
endif
|
||||
if have_avx and have_fma
|
||||
audioconvert_avx = static_library('audioconvert_avx',
|
||||
['resample-native-avx.c'],
|
||||
c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'],
|
||||
if have_avx2 and have_fma
|
||||
audioconvert_avx2_fma = static_library('audioconvert_avx2_fma',
|
||||
['resample-native-avx2.c'],
|
||||
c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'],
|
||||
dependencies : [ spa_dep ],
|
||||
install : false
|
||||
)
|
||||
simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA']
|
||||
simd_dependencies += audioconvert_avx
|
||||
simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA']
|
||||
simd_dependencies += audioconvert_avx2_fma
|
||||
endif
|
||||
if have_avx2
|
||||
audioconvert_avx2 = static_library('audioconvert_avx2',
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
#include <assert.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_avx2(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT taps, uint32_t n_taps)
|
||||
{
|
||||
__m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty;
|
||||
@ -36,7 +36,7 @@ static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
|
||||
_mm_store_ss(d, sx[0]);
|
||||
}
|
||||
|
||||
static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
|
||||
static inline void inner_product_ip_avx2(float *d, const float * SPA_RESTRICT s,
|
||||
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
|
||||
uint32_t n_taps)
|
||||
{
|
||||
@ -70,5 +70,5 @@ static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
|
||||
_mm_store_ss(d, sx[0]);
|
||||
}
|
||||
|
||||
MAKE_RESAMPLER_FULL(avx);
|
||||
MAKE_RESAMPLER_INTER(avx);
|
||||
MAKE_RESAMPLER_FULL(avx2);
|
||||
MAKE_RESAMPLER_INTER(avx2);
|
||||
@ -175,7 +175,7 @@ DEFINE_RESAMPLER(inter,sse);
|
||||
DEFINE_RESAMPLER(full,ssse3);
|
||||
DEFINE_RESAMPLER(inter,ssse3);
|
||||
#endif
|
||||
#if defined (HAVE_AVX) && defined(HAVE_FMA)
|
||||
DEFINE_RESAMPLER(full,avx);
|
||||
DEFINE_RESAMPLER(inter,avx);
|
||||
#if defined (HAVE_AVX2) && defined(HAVE_FMA)
|
||||
DEFINE_RESAMPLER(full,avx2);
|
||||
DEFINE_RESAMPLER(inter,avx2);
|
||||
#endif
|
||||
|
||||
@ -209,8 +209,8 @@ static struct resample_info resample_table[] =
|
||||
#if defined (HAVE_NEON)
|
||||
MAKE(F32, copy_c, full_neon, inter_neon, SPA_CPU_FLAG_NEON),
|
||||
#endif
|
||||
#if defined(HAVE_AVX) && defined(HAVE_FMA)
|
||||
MAKE(F32, copy_c, full_avx, inter_avx, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3),
|
||||
#if defined(HAVE_AVX2) && defined(HAVE_FMA)
|
||||
MAKE(F32, copy_c, full_avx2, inter_avx2, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3),
|
||||
#endif
|
||||
#if defined (HAVE_SSSE3)
|
||||
MAKE(F32, copy_c, full_ssse3, inter_ssse3, SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED),
|
||||
|
||||
@ -143,9 +143,9 @@ static void test_f32(void)
|
||||
run_test("test_f32", "sse", mix_f32_sse);
|
||||
}
|
||||
#endif
|
||||
#if defined (HAVE_AVX)
|
||||
if (cpu_flags & SPA_CPU_FLAG_AVX) {
|
||||
run_test("test_f32", "avx", mix_f32_avx);
|
||||
#if defined (HAVE_AVX2)
|
||||
if (cpu_flags & SPA_CPU_FLAG_AVX2) {
|
||||
run_test("test_f32", "avx2", mix_f32_avx2);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -35,15 +35,15 @@ if have_sse2
|
||||
simd_cargs += ['-DHAVE_SSE2']
|
||||
simd_dependencies += audiomixer_sse2
|
||||
endif
|
||||
if have_avx and have_fma
|
||||
audiomixer_avx = static_library('audiomixer_avx',
|
||||
['mix-ops-avx.c'],
|
||||
c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'],
|
||||
if have_avx2 and have_fma
|
||||
audiomixer_avx2 = static_library('audiomixer_avx2',
|
||||
['mix-ops-avx2.c'],
|
||||
c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'],
|
||||
dependencies : [ spa_dep ],
|
||||
install : false
|
||||
)
|
||||
simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA']
|
||||
simd_dependencies += audiomixer_avx
|
||||
simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA']
|
||||
simd_dependencies += audiomixer_avx2
|
||||
endif
|
||||
|
||||
audiomixer_lib = static_library('audiomixer',
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
#include <immintrin.h>
|
||||
|
||||
void
|
||||
mix_f32_avx(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
|
||||
mix_f32_avx2(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
|
||||
uint32_t n_src, uint32_t n_samples)
|
||||
{
|
||||
n_samples *= ops->n_channels;
|
||||
@ -26,9 +26,9 @@ struct mix_info {
|
||||
static struct mix_info mix_table[] =
|
||||
{
|
||||
/* f32 */
|
||||
#if defined(HAVE_AVX)
|
||||
{ SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_AVX, 4, mix_f32_avx },
|
||||
{ SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_AVX, 4, mix_f32_avx },
|
||||
#if defined(HAVE_AVX2)
|
||||
{ SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_AVX2, 4, mix_f32_avx2 },
|
||||
{ SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_AVX2, 4, mix_f32_avx2 },
|
||||
#endif
|
||||
#if defined (HAVE_SSE)
|
||||
{ SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_SSE, 4, mix_f32_sse },
|
||||
|
||||
@ -144,6 +144,6 @@ DEFINE_FUNCTION(f32, sse);
|
||||
#if defined(HAVE_SSE2)
|
||||
DEFINE_FUNCTION(f64, sse2);
|
||||
#endif
|
||||
#if defined(HAVE_AVX)
|
||||
DEFINE_FUNCTION(f32, avx);
|
||||
#if defined(HAVE_AVX2)
|
||||
DEFINE_FUNCTION(f32, avx2);
|
||||
#endif
|
||||
|
||||
@ -220,11 +220,11 @@ static void test_f32(void)
|
||||
run_test("test_f32_4_sse", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_sse);
|
||||
}
|
||||
#endif
|
||||
#if defined(HAVE_AVX)
|
||||
if (cpu_flags & SPA_CPU_FLAG_AVX) {
|
||||
run_test("test_f32_0_avx", NULL, 0, out, sizeof(out), SPA_N_ELEMENTS(out), mix_f32_avx);
|
||||
run_test("test_f32_1_avx", src, 1, in_1, sizeof(in_1), SPA_N_ELEMENTS(in_1), mix_f32_avx);
|
||||
run_test("test_f32_4_avx", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_avx);
|
||||
#if defined(HAVE_AVX2)
|
||||
if (cpu_flags & SPA_CPU_FLAG_AVX2) {
|
||||
run_test("test_f32_0_avx", NULL, 0, out, sizeof(out), SPA_N_ELEMENTS(out), mix_f32_avx2);
|
||||
run_test("test_f32_1_avx", src, 1, in_1, sizeof(in_1), SPA_N_ELEMENTS(in_1), mix_f32_avx2);
|
||||
run_test("test_f32_4_avx", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_avx2);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
static void dsp_add_avx(void *obj, float *dst, const float * SPA_RESTRICT src[],
|
||||
static void dsp_add_avx2(void *obj, float *dst, const float * SPA_RESTRICT src[],
|
||||
uint32_t n_src, uint32_t n_samples)
|
||||
{
|
||||
uint32_t n, i, unrolled;
|
||||
@ -62,7 +62,7 @@ static void dsp_add_avx(void *obj, float *dst, const float * SPA_RESTRICT src[],
|
||||
}
|
||||
}
|
||||
|
||||
static void dsp_add_1_gain_avx(void *obj, float *dst, const float * SPA_RESTRICT src[],
|
||||
static void dsp_add_1_gain_avx2(void *obj, float *dst, const float * SPA_RESTRICT src[],
|
||||
uint32_t n_src, float gain, uint32_t n_samples)
|
||||
{
|
||||
uint32_t n, i, unrolled;
|
||||
@ -111,7 +111,7 @@ static void dsp_add_1_gain_avx(void *obj, float *dst, const float * SPA_RESTRICT
|
||||
}
|
||||
}
|
||||
|
||||
static void dsp_add_n_gain_avx(void *obj, float *dst,
|
||||
static void dsp_add_n_gain_avx2(void *obj, float *dst,
|
||||
const float * SPA_RESTRICT src[], uint32_t n_src,
|
||||
float gain[], uint32_t n_gain, uint32_t n_samples)
|
||||
{
|
||||
@ -163,7 +163,7 @@ static void dsp_add_n_gain_avx(void *obj, float *dst,
|
||||
}
|
||||
|
||||
|
||||
void dsp_mix_gain_avx(void *obj,
|
||||
void dsp_mix_gain_avx2(void *obj,
|
||||
float * SPA_RESTRICT dst,
|
||||
const float * SPA_RESTRICT src[], uint32_t n_src,
|
||||
float gain[], uint32_t n_gain, uint32_t n_samples)
|
||||
@ -175,15 +175,15 @@ void dsp_mix_gain_avx(void *obj,
|
||||
spa_memcpy(dst, src[0], n_samples * sizeof(float));
|
||||
} else {
|
||||
if (n_gain == 0)
|
||||
dsp_add_avx(obj, dst, src, n_src, n_samples);
|
||||
dsp_add_avx2(obj, dst, src, n_src, n_samples);
|
||||
else if (n_gain < n_src)
|
||||
dsp_add_1_gain_avx(obj, dst, src, n_src, gain[0], n_samples);
|
||||
dsp_add_1_gain_avx2(obj, dst, src, n_src, gain[0], n_samples);
|
||||
else
|
||||
dsp_add_n_gain_avx(obj, dst, src, n_src, gain, n_gain, n_samples);
|
||||
dsp_add_n_gain_avx2(obj, dst, src, n_src, gain, n_gain, n_samples);
|
||||
}
|
||||
}
|
||||
|
||||
void dsp_sum_avx(void *obj, float *r, const float *a, const float *b, uint32_t n_samples)
|
||||
void dsp_sum_avx2(void *obj, float *r, const float *a, const float *b, uint32_t n_samples)
|
||||
{
|
||||
uint32_t n, unrolled;
|
||||
__m256 in[4];
|
||||
@ -246,7 +246,7 @@ inline static __m256 _mm256_mul_pz(__m256 ab, __m256 cd)
|
||||
return _mm256_addsub_ps(x0, x1);
|
||||
}
|
||||
|
||||
void dsp_fft_cmul_avx(void *obj, void *fft,
|
||||
void dsp_fft_cmul_avx2(void *obj, void *fft,
|
||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
|
||||
const float * SPA_RESTRICT b, uint32_t len, const float scale)
|
||||
{
|
||||
@ -283,7 +283,7 @@ void dsp_fft_cmul_avx(void *obj, void *fft,
|
||||
#endif
|
||||
}
|
||||
|
||||
void dsp_fft_cmuladd_avx(void *obj, void *fft,
|
||||
void dsp_fft_cmuladd_avx2(void *obj, void *fft,
|
||||
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
|
||||
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
|
||||
uint32_t len, const float scale)
|
||||
@ -84,11 +84,11 @@ MAKE_DELAY_FUNC(sse);
|
||||
MAKE_FFT_CMUL_FUNC(sse);
|
||||
MAKE_FFT_CMULADD_FUNC(sse);
|
||||
#endif
|
||||
#if defined (HAVE_AVX)
|
||||
MAKE_MIX_GAIN_FUNC(avx);
|
||||
MAKE_SUM_FUNC(avx);
|
||||
MAKE_FFT_CMUL_FUNC(avx);
|
||||
MAKE_FFT_CMULADD_FUNC(avx);
|
||||
#if defined (HAVE_AVX2)
|
||||
MAKE_MIX_GAIN_FUNC(avx2);
|
||||
MAKE_SUM_FUNC(avx2);
|
||||
MAKE_FFT_CMUL_FUNC(avx2);
|
||||
MAKE_FFT_CMULADD_FUNC(avx2);
|
||||
#endif
|
||||
|
||||
#endif /* DSP_OPS_IMPL_H */
|
||||
|
||||
@ -23,13 +23,13 @@ struct dsp_info {
|
||||
|
||||
static const struct dsp_info dsp_table[] =
|
||||
{
|
||||
#if defined (HAVE_AVX)
|
||||
{ SPA_CPU_FLAG_AVX,
|
||||
#if defined (HAVE_AVX2)
|
||||
{ SPA_CPU_FLAG_AVX2,
|
||||
.funcs.clear = dsp_clear_c,
|
||||
.funcs.copy = dsp_copy_c,
|
||||
.funcs.mix_gain = dsp_mix_gain_avx,
|
||||
.funcs.mix_gain = dsp_mix_gain_avx2,
|
||||
.funcs.biquad_run = dsp_biquad_run_sse,
|
||||
.funcs.sum = dsp_sum_avx,
|
||||
.funcs.sum = dsp_sum_avx2,
|
||||
.funcs.linear = dsp_linear_c,
|
||||
.funcs.mult = dsp_mult_c,
|
||||
.funcs.fft_new = dsp_fft_new_c,
|
||||
@ -38,8 +38,8 @@ static const struct dsp_info dsp_table[] =
|
||||
.funcs.fft_memfree = dsp_fft_memfree_c,
|
||||
.funcs.fft_memclear = dsp_fft_memclear_c,
|
||||
.funcs.fft_run = dsp_fft_run_c,
|
||||
.funcs.fft_cmul = dsp_fft_cmul_avx,
|
||||
.funcs.fft_cmuladd = dsp_fft_cmuladd_avx,
|
||||
.funcs.fft_cmul = dsp_fft_cmul_avx2,
|
||||
.funcs.fft_cmuladd = dsp_fft_cmuladd_avx2,
|
||||
.funcs.delay = dsp_delay_sse,
|
||||
},
|
||||
#endif
|
||||
|
||||
@ -18,16 +18,16 @@ if have_sse
|
||||
simd_cargs += ['-DHAVE_SSE']
|
||||
simd_dependencies += filter_graph_sse
|
||||
endif
|
||||
if have_avx
|
||||
filter_graph_avx = static_library('filter_graph_avx',
|
||||
['audio-dsp-avx.c' ],
|
||||
if have_avx2
|
||||
filter_graph_avx2 = static_library('filter_graph_avx2',
|
||||
['audio-dsp-avx2.c' ],
|
||||
include_directories : [configinc],
|
||||
c_args : [avx_args, fma_args,'-O3', '-DHAVE_AVX'],
|
||||
c_args : [avx2_args, fma_args,'-O3', '-DHAVE_AVX2'],
|
||||
dependencies : [ spa_dep ],
|
||||
install : false
|
||||
)
|
||||
simd_cargs += ['-DHAVE_AVX']
|
||||
simd_dependencies += filter_graph_avx
|
||||
simd_cargs += ['-DHAVE_AVX2']
|
||||
simd_dependencies += filter_graph_avx2
|
||||
endif
|
||||
if have_neon
|
||||
filter_graph_neon = static_library('filter_graph_neon',
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user