spa: use the right AVX2 flags

Our AVX optimizations are really AVX2 so rename the files and functions
and use the right HAVE_AVX2 and cpu flags to compile and select the
right functions.

Fixes #5072
This commit is contained in:
Wim Taymans 2026-01-13 12:03:09 +01:00
parent c1f7963c2a
commit 13131706aa
15 changed files with 66 additions and 66 deletions

View File

@ -156,17 +156,17 @@ int main(int argc, char *argv[])
}
}
#endif
#if defined (HAVE_AVX) && defined(HAVE_FMA)
if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3)) {
#if defined (HAVE_AVX2) && defined(HAVE_FMA)
if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3)) {
for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) {
spa_zero(r);
r.channels = 2;
r.cpu_flags = SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3;
r.cpu_flags = SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3;
r.i_rate = in_rates[i];
r.o_rate = out_rates[i];
r.quality = RESAMPLE_DEFAULT_QUALITY;
resample_native_init(&r);
run_test("native", "avx", &r);
run_test("native", "avx2", &r);
resample_free(&r);
}
}

View File

@ -72,15 +72,15 @@ if have_sse41
simd_cargs += ['-DHAVE_SSE41']
simd_dependencies += audioconvert_sse41
endif
if have_avx and have_fma
audioconvert_avx = static_library('audioconvert_avx',
['resample-native-avx.c'],
c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'],
if have_avx2 and have_fma
audioconvert_avx2_fma = static_library('audioconvert_avx2_fma',
['resample-native-avx2.c'],
c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'],
dependencies : [ spa_dep ],
install : false
)
simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA']
simd_dependencies += audioconvert_avx
simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA']
simd_dependencies += audioconvert_avx2_fma
endif
if have_avx2
audioconvert_avx2 = static_library('audioconvert_avx2',

View File

@ -7,7 +7,7 @@
#include <assert.h>
#include <immintrin.h>
static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
static inline void inner_product_avx2(float *d, const float * SPA_RESTRICT s,
const float * SPA_RESTRICT taps, uint32_t n_taps)
{
__m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty;
@ -36,7 +36,7 @@ static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s,
_mm_store_ss(d, sx[0]);
}
static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
static inline void inner_product_ip_avx2(float *d, const float * SPA_RESTRICT s,
const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x,
uint32_t n_taps)
{
@ -70,5 +70,5 @@ static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s,
_mm_store_ss(d, sx[0]);
}
MAKE_RESAMPLER_FULL(avx);
MAKE_RESAMPLER_INTER(avx);
MAKE_RESAMPLER_FULL(avx2);
MAKE_RESAMPLER_INTER(avx2);

View File

@ -175,7 +175,7 @@ DEFINE_RESAMPLER(inter,sse);
DEFINE_RESAMPLER(full,ssse3);
DEFINE_RESAMPLER(inter,ssse3);
#endif
#if defined (HAVE_AVX) && defined(HAVE_FMA)
DEFINE_RESAMPLER(full,avx);
DEFINE_RESAMPLER(inter,avx);
#if defined (HAVE_AVX2) && defined(HAVE_FMA)
DEFINE_RESAMPLER(full,avx2);
DEFINE_RESAMPLER(inter,avx2);
#endif

View File

@ -209,8 +209,8 @@ static struct resample_info resample_table[] =
#if defined (HAVE_NEON)
MAKE(F32, copy_c, full_neon, inter_neon, SPA_CPU_FLAG_NEON),
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA)
MAKE(F32, copy_c, full_avx, inter_avx, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3),
#if defined(HAVE_AVX2) && defined(HAVE_FMA)
MAKE(F32, copy_c, full_avx2, inter_avx2, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3),
#endif
#if defined (HAVE_SSSE3)
MAKE(F32, copy_c, full_ssse3, inter_ssse3, SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED),

View File

@ -143,9 +143,9 @@ static void test_f32(void)
run_test("test_f32", "sse", mix_f32_sse);
}
#endif
#if defined (HAVE_AVX)
if (cpu_flags & SPA_CPU_FLAG_AVX) {
run_test("test_f32", "avx", mix_f32_avx);
#if defined (HAVE_AVX2)
if (cpu_flags & SPA_CPU_FLAG_AVX2) {
run_test("test_f32", "avx2", mix_f32_avx2);
}
#endif
}

View File

@ -35,15 +35,15 @@ if have_sse2
simd_cargs += ['-DHAVE_SSE2']
simd_dependencies += audiomixer_sse2
endif
if have_avx and have_fma
audiomixer_avx = static_library('audiomixer_avx',
['mix-ops-avx.c'],
c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'],
if have_avx2 and have_fma
audiomixer_avx2 = static_library('audiomixer_avx2',
['mix-ops-avx2.c'],
c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'],
dependencies : [ spa_dep ],
install : false
)
simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA']
simd_dependencies += audiomixer_avx
simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA']
simd_dependencies += audiomixer_avx2
endif
audiomixer_lib = static_library('audiomixer',

View File

@ -13,7 +13,7 @@
#include <immintrin.h>
void
mix_f32_avx(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
mix_f32_avx2(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[],
uint32_t n_src, uint32_t n_samples)
{
n_samples *= ops->n_channels;

View File

@ -26,9 +26,9 @@ struct mix_info {
static struct mix_info mix_table[] =
{
/* f32 */
#if defined(HAVE_AVX)
{ SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_AVX, 4, mix_f32_avx },
{ SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_AVX, 4, mix_f32_avx },
#if defined(HAVE_AVX2)
{ SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_AVX2, 4, mix_f32_avx2 },
{ SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_AVX2, 4, mix_f32_avx2 },
#endif
#if defined (HAVE_SSE)
{ SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_SSE, 4, mix_f32_sse },

View File

@ -144,6 +144,6 @@ DEFINE_FUNCTION(f32, sse);
#if defined(HAVE_SSE2)
DEFINE_FUNCTION(f64, sse2);
#endif
#if defined(HAVE_AVX)
DEFINE_FUNCTION(f32, avx);
#if defined(HAVE_AVX2)
DEFINE_FUNCTION(f32, avx2);
#endif

View File

@ -220,11 +220,11 @@ static void test_f32(void)
run_test("test_f32_4_sse", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_sse);
}
#endif
#if defined(HAVE_AVX)
if (cpu_flags & SPA_CPU_FLAG_AVX) {
run_test("test_f32_0_avx", NULL, 0, out, sizeof(out), SPA_N_ELEMENTS(out), mix_f32_avx);
run_test("test_f32_1_avx", src, 1, in_1, sizeof(in_1), SPA_N_ELEMENTS(in_1), mix_f32_avx);
run_test("test_f32_4_avx", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_avx);
#if defined(HAVE_AVX2)
if (cpu_flags & SPA_CPU_FLAG_AVX2) {
run_test("test_f32_0_avx", NULL, 0, out, sizeof(out), SPA_N_ELEMENTS(out), mix_f32_avx2);
run_test("test_f32_1_avx", src, 1, in_1, sizeof(in_1), SPA_N_ELEMENTS(in_1), mix_f32_avx2);
run_test("test_f32_4_avx", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_avx2);
}
#endif
}

View File

@ -17,7 +17,7 @@
#include <immintrin.h>
static void dsp_add_avx(void *obj, float *dst, const float * SPA_RESTRICT src[],
static void dsp_add_avx2(void *obj, float *dst, const float * SPA_RESTRICT src[],
uint32_t n_src, uint32_t n_samples)
{
uint32_t n, i, unrolled;
@ -62,7 +62,7 @@ static void dsp_add_avx(void *obj, float *dst, const float * SPA_RESTRICT src[],
}
}
static void dsp_add_1_gain_avx(void *obj, float *dst, const float * SPA_RESTRICT src[],
static void dsp_add_1_gain_avx2(void *obj, float *dst, const float * SPA_RESTRICT src[],
uint32_t n_src, float gain, uint32_t n_samples)
{
uint32_t n, i, unrolled;
@ -111,7 +111,7 @@ static void dsp_add_1_gain_avx(void *obj, float *dst, const float * SPA_RESTRICT
}
}
static void dsp_add_n_gain_avx(void *obj, float *dst,
static void dsp_add_n_gain_avx2(void *obj, float *dst,
const float * SPA_RESTRICT src[], uint32_t n_src,
float gain[], uint32_t n_gain, uint32_t n_samples)
{
@ -163,7 +163,7 @@ static void dsp_add_n_gain_avx(void *obj, float *dst,
}
void dsp_mix_gain_avx(void *obj,
void dsp_mix_gain_avx2(void *obj,
float * SPA_RESTRICT dst,
const float * SPA_RESTRICT src[], uint32_t n_src,
float gain[], uint32_t n_gain, uint32_t n_samples)
@ -175,15 +175,15 @@ void dsp_mix_gain_avx(void *obj,
spa_memcpy(dst, src[0], n_samples * sizeof(float));
} else {
if (n_gain == 0)
dsp_add_avx(obj, dst, src, n_src, n_samples);
dsp_add_avx2(obj, dst, src, n_src, n_samples);
else if (n_gain < n_src)
dsp_add_1_gain_avx(obj, dst, src, n_src, gain[0], n_samples);
dsp_add_1_gain_avx2(obj, dst, src, n_src, gain[0], n_samples);
else
dsp_add_n_gain_avx(obj, dst, src, n_src, gain, n_gain, n_samples);
dsp_add_n_gain_avx2(obj, dst, src, n_src, gain, n_gain, n_samples);
}
}
void dsp_sum_avx(void *obj, float *r, const float *a, const float *b, uint32_t n_samples)
void dsp_sum_avx2(void *obj, float *r, const float *a, const float *b, uint32_t n_samples)
{
uint32_t n, unrolled;
__m256 in[4];
@ -246,7 +246,7 @@ inline static __m256 _mm256_mul_pz(__m256 ab, __m256 cd)
return _mm256_addsub_ps(x0, x1);
}
void dsp_fft_cmul_avx(void *obj, void *fft,
void dsp_fft_cmul_avx2(void *obj, void *fft,
float * SPA_RESTRICT dst, const float * SPA_RESTRICT a,
const float * SPA_RESTRICT b, uint32_t len, const float scale)
{
@ -283,7 +283,7 @@ void dsp_fft_cmul_avx(void *obj, void *fft,
#endif
}
void dsp_fft_cmuladd_avx(void *obj, void *fft,
void dsp_fft_cmuladd_avx2(void *obj, void *fft,
float * SPA_RESTRICT dst, const float * SPA_RESTRICT src,
const float * SPA_RESTRICT a, const float * SPA_RESTRICT b,
uint32_t len, const float scale)

View File

@ -84,11 +84,11 @@ MAKE_DELAY_FUNC(sse);
MAKE_FFT_CMUL_FUNC(sse);
MAKE_FFT_CMULADD_FUNC(sse);
#endif
#if defined (HAVE_AVX)
MAKE_MIX_GAIN_FUNC(avx);
MAKE_SUM_FUNC(avx);
MAKE_FFT_CMUL_FUNC(avx);
MAKE_FFT_CMULADD_FUNC(avx);
#if defined (HAVE_AVX2)
MAKE_MIX_GAIN_FUNC(avx2);
MAKE_SUM_FUNC(avx2);
MAKE_FFT_CMUL_FUNC(avx2);
MAKE_FFT_CMULADD_FUNC(avx2);
#endif
#endif /* DSP_OPS_IMPL_H */

View File

@ -23,13 +23,13 @@ struct dsp_info {
static const struct dsp_info dsp_table[] =
{
#if defined (HAVE_AVX)
{ SPA_CPU_FLAG_AVX,
#if defined (HAVE_AVX2)
{ SPA_CPU_FLAG_AVX2,
.funcs.clear = dsp_clear_c,
.funcs.copy = dsp_copy_c,
.funcs.mix_gain = dsp_mix_gain_avx,
.funcs.mix_gain = dsp_mix_gain_avx2,
.funcs.biquad_run = dsp_biquad_run_sse,
.funcs.sum = dsp_sum_avx,
.funcs.sum = dsp_sum_avx2,
.funcs.linear = dsp_linear_c,
.funcs.mult = dsp_mult_c,
.funcs.fft_new = dsp_fft_new_c,
@ -38,8 +38,8 @@ static const struct dsp_info dsp_table[] =
.funcs.fft_memfree = dsp_fft_memfree_c,
.funcs.fft_memclear = dsp_fft_memclear_c,
.funcs.fft_run = dsp_fft_run_c,
.funcs.fft_cmul = dsp_fft_cmul_avx,
.funcs.fft_cmuladd = dsp_fft_cmuladd_avx,
.funcs.fft_cmul = dsp_fft_cmul_avx2,
.funcs.fft_cmuladd = dsp_fft_cmuladd_avx2,
.funcs.delay = dsp_delay_sse,
},
#endif

View File

@ -18,16 +18,16 @@ if have_sse
simd_cargs += ['-DHAVE_SSE']
simd_dependencies += filter_graph_sse
endif
if have_avx
filter_graph_avx = static_library('filter_graph_avx',
['audio-dsp-avx.c' ],
if have_avx2
filter_graph_avx2 = static_library('filter_graph_avx2',
['audio-dsp-avx2.c' ],
include_directories : [configinc],
c_args : [avx_args, fma_args,'-O3', '-DHAVE_AVX'],
c_args : [avx2_args, fma_args,'-O3', '-DHAVE_AVX2'],
dependencies : [ spa_dep ],
install : false
)
simd_cargs += ['-DHAVE_AVX']
simd_dependencies += filter_graph_avx
simd_cargs += ['-DHAVE_AVX2']
simd_dependencies += filter_graph_avx2
endif
if have_neon
filter_graph_neon = static_library('filter_graph_neon',