From 13131706aa52242b3951a9c661ce2e50349bdab0 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Tue, 13 Jan 2026 12:03:09 +0100 Subject: [PATCH] spa: use the right AVX2 flags Our AVX optimizations are really AVX2 so rename the files and functions and use the right HAVE_AVX2 and cpu flags to compile and select the right functions. Fixes #5072 --- spa/plugins/audioconvert/benchmark-resample.c | 8 ++++---- spa/plugins/audioconvert/meson.build | 12 +++++------ ...le-native-avx.c => resample-native-avx2.c} | 8 ++++---- .../audioconvert/resample-native-impl.h | 6 +++--- spa/plugins/audioconvert/resample-native.c | 4 ++-- spa/plugins/audiomixer/benchmark-mix-ops.c | 6 +++--- spa/plugins/audiomixer/meson.build | 12 +++++------ .../{mix-ops-avx.c => mix-ops-avx2.c} | 2 +- spa/plugins/audiomixer/mix-ops.c | 6 +++--- spa/plugins/audiomixer/mix-ops.h | 4 ++-- spa/plugins/audiomixer/test-mix-ops.c | 10 +++++----- .../{audio-dsp-avx.c => audio-dsp-avx2.c} | 20 +++++++++---------- spa/plugins/filter-graph/audio-dsp-impl.h | 10 +++++----- spa/plugins/filter-graph/audio-dsp.c | 12 +++++------ spa/plugins/filter-graph/meson.build | 12 +++++------ 15 files changed, 66 insertions(+), 66 deletions(-) rename spa/plugins/audioconvert/{resample-native-avx.c => resample-native-avx2.c} (91%) rename spa/plugins/audiomixer/{mix-ops-avx.c => mix-ops-avx2.c} (94%) rename spa/plugins/filter-graph/{audio-dsp-avx.c => audio-dsp-avx2.c} (93%) diff --git a/spa/plugins/audioconvert/benchmark-resample.c b/spa/plugins/audioconvert/benchmark-resample.c index 35814cd73..7b9864362 100644 --- a/spa/plugins/audioconvert/benchmark-resample.c +++ b/spa/plugins/audioconvert/benchmark-resample.c @@ -156,17 +156,17 @@ int main(int argc, char *argv[]) } } #endif -#if defined (HAVE_AVX) && defined(HAVE_FMA) - if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3)) { +#if defined (HAVE_AVX2) && defined(HAVE_FMA) + if (SPA_FLAG_IS_SET(cpu_flags, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3)) { for (i = 0; i < SPA_N_ELEMENTS(in_rates); i++) { spa_zero(r); r.channels = 2; - r.cpu_flags = SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3; + r.cpu_flags = SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3; r.i_rate = in_rates[i]; r.o_rate = out_rates[i]; r.quality = RESAMPLE_DEFAULT_QUALITY; resample_native_init(&r); - run_test("native", "avx", &r); + run_test("native", "avx2", &r); resample_free(&r); } } diff --git a/spa/plugins/audioconvert/meson.build b/spa/plugins/audioconvert/meson.build index 64379c845..bd60872b6 100644 --- a/spa/plugins/audioconvert/meson.build +++ b/spa/plugins/audioconvert/meson.build @@ -72,15 +72,15 @@ if have_sse41 simd_cargs += ['-DHAVE_SSE41'] simd_dependencies += audioconvert_sse41 endif -if have_avx and have_fma - audioconvert_avx = static_library('audioconvert_avx', - ['resample-native-avx.c'], - c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'], +if have_avx2 and have_fma + audioconvert_avx2_fma = static_library('audioconvert_avx2_fma', + ['resample-native-avx2.c'], + c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'], dependencies : [ spa_dep ], install : false ) - simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA'] - simd_dependencies += audioconvert_avx + simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA'] + simd_dependencies += audioconvert_avx2_fma endif if have_avx2 audioconvert_avx2 = static_library('audioconvert_avx2', diff --git a/spa/plugins/audioconvert/resample-native-avx.c b/spa/plugins/audioconvert/resample-native-avx2.c similarity index 91% rename from spa/plugins/audioconvert/resample-native-avx.c rename to spa/plugins/audioconvert/resample-native-avx2.c index a57d668b7..ba009a97b 100644 --- a/spa/plugins/audioconvert/resample-native-avx.c +++ b/spa/plugins/audioconvert/resample-native-avx2.c @@ -7,7 +7,7 @@ #include #include -static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_avx2(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT taps, uint32_t n_taps) { __m256 sy[2] = { _mm256_setzero_ps(), _mm256_setzero_ps() }, ty; @@ -36,7 +36,7 @@ static inline void inner_product_avx(float *d, const float * SPA_RESTRICT s, _mm_store_ss(d, sx[0]); } -static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s, +static inline void inner_product_ip_avx2(float *d, const float * SPA_RESTRICT s, const float * SPA_RESTRICT t0, const float * SPA_RESTRICT t1, float x, uint32_t n_taps) { @@ -70,5 +70,5 @@ static inline void inner_product_ip_avx(float *d, const float * SPA_RESTRICT s, _mm_store_ss(d, sx[0]); } -MAKE_RESAMPLER_FULL(avx); -MAKE_RESAMPLER_INTER(avx); +MAKE_RESAMPLER_FULL(avx2); +MAKE_RESAMPLER_INTER(avx2); diff --git a/spa/plugins/audioconvert/resample-native-impl.h b/spa/plugins/audioconvert/resample-native-impl.h index 11a0851b3..40879b747 100644 --- a/spa/plugins/audioconvert/resample-native-impl.h +++ b/spa/plugins/audioconvert/resample-native-impl.h @@ -175,7 +175,7 @@ DEFINE_RESAMPLER(inter,sse); DEFINE_RESAMPLER(full,ssse3); DEFINE_RESAMPLER(inter,ssse3); #endif -#if defined (HAVE_AVX) && defined(HAVE_FMA) -DEFINE_RESAMPLER(full,avx); -DEFINE_RESAMPLER(inter,avx); +#if defined (HAVE_AVX2) && defined(HAVE_FMA) +DEFINE_RESAMPLER(full,avx2); +DEFINE_RESAMPLER(inter,avx2); #endif diff --git a/spa/plugins/audioconvert/resample-native.c b/spa/plugins/audioconvert/resample-native.c index 8997f4e61..6163d426f 100644 --- a/spa/plugins/audioconvert/resample-native.c +++ b/spa/plugins/audioconvert/resample-native.c @@ -209,8 +209,8 @@ static struct resample_info resample_table[] = #if defined (HAVE_NEON) MAKE(F32, copy_c, full_neon, inter_neon, SPA_CPU_FLAG_NEON), #endif -#if defined(HAVE_AVX) && defined(HAVE_FMA) - MAKE(F32, copy_c, full_avx, inter_avx, SPA_CPU_FLAG_AVX | SPA_CPU_FLAG_FMA3), +#if defined(HAVE_AVX2) && defined(HAVE_FMA) + MAKE(F32, copy_c, full_avx2, inter_avx2, SPA_CPU_FLAG_AVX2 | SPA_CPU_FLAG_FMA3), #endif #if defined (HAVE_SSSE3) MAKE(F32, copy_c, full_ssse3, inter_ssse3, SPA_CPU_FLAG_SSSE3 | SPA_CPU_FLAG_SLOW_UNALIGNED), diff --git a/spa/plugins/audiomixer/benchmark-mix-ops.c b/spa/plugins/audiomixer/benchmark-mix-ops.c index 187035669..40c8c70b9 100644 --- a/spa/plugins/audiomixer/benchmark-mix-ops.c +++ b/spa/plugins/audiomixer/benchmark-mix-ops.c @@ -143,9 +143,9 @@ static void test_f32(void) run_test("test_f32", "sse", mix_f32_sse); } #endif -#if defined (HAVE_AVX) - if (cpu_flags & SPA_CPU_FLAG_AVX) { - run_test("test_f32", "avx", mix_f32_avx); +#if defined (HAVE_AVX2) + if (cpu_flags & SPA_CPU_FLAG_AVX2) { + run_test("test_f32", "avx2", mix_f32_avx2); } #endif } diff --git a/spa/plugins/audiomixer/meson.build b/spa/plugins/audiomixer/meson.build index 65aafee20..fa44c625c 100644 --- a/spa/plugins/audiomixer/meson.build +++ b/spa/plugins/audiomixer/meson.build @@ -35,15 +35,15 @@ if have_sse2 simd_cargs += ['-DHAVE_SSE2'] simd_dependencies += audiomixer_sse2 endif -if have_avx and have_fma - audiomixer_avx = static_library('audiomixer_avx', - ['mix-ops-avx.c'], - c_args : [avx_args, fma_args, '-O3', '-DHAVE_AVX', '-DHAVE_FMA'], +if have_avx2 and have_fma + audiomixer_avx2 = static_library('audiomixer_avx2', + ['mix-ops-avx2.c'], + c_args : [avx2_args, fma_args, '-O3', '-DHAVE_AVX2', '-DHAVE_FMA'], dependencies : [ spa_dep ], install : false ) - simd_cargs += ['-DHAVE_AVX', '-DHAVE_FMA'] - simd_dependencies += audiomixer_avx + simd_cargs += ['-DHAVE_AVX2', '-DHAVE_FMA'] + simd_dependencies += audiomixer_avx2 endif audiomixer_lib = static_library('audiomixer', diff --git a/spa/plugins/audiomixer/mix-ops-avx.c b/spa/plugins/audiomixer/mix-ops-avx2.c similarity index 94% rename from spa/plugins/audiomixer/mix-ops-avx.c rename to spa/plugins/audiomixer/mix-ops-avx2.c index af717d1b4..46f3abf55 100644 --- a/spa/plugins/audiomixer/mix-ops-avx.c +++ b/spa/plugins/audiomixer/mix-ops-avx2.c @@ -13,7 +13,7 @@ #include void -mix_f32_avx(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], +mix_f32_avx2(struct mix_ops *ops, void * SPA_RESTRICT dst, const void * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples) { n_samples *= ops->n_channels; diff --git a/spa/plugins/audiomixer/mix-ops.c b/spa/plugins/audiomixer/mix-ops.c index bebd66f68..a172e0005 100644 --- a/spa/plugins/audiomixer/mix-ops.c +++ b/spa/plugins/audiomixer/mix-ops.c @@ -26,9 +26,9 @@ struct mix_info { static struct mix_info mix_table[] = { /* f32 */ -#if defined(HAVE_AVX) - { SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_AVX, 4, mix_f32_avx }, - { SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_AVX, 4, mix_f32_avx }, +#if defined(HAVE_AVX2) + { SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_AVX2, 4, mix_f32_avx2 }, + { SPA_AUDIO_FORMAT_F32P, 0, SPA_CPU_FLAG_AVX2, 4, mix_f32_avx2 }, #endif #if defined (HAVE_SSE) { SPA_AUDIO_FORMAT_F32, 0, SPA_CPU_FLAG_SSE, 4, mix_f32_sse }, diff --git a/spa/plugins/audiomixer/mix-ops.h b/spa/plugins/audiomixer/mix-ops.h index 0d9559636..45078f63c 100644 --- a/spa/plugins/audiomixer/mix-ops.h +++ b/spa/plugins/audiomixer/mix-ops.h @@ -144,6 +144,6 @@ DEFINE_FUNCTION(f32, sse); #if defined(HAVE_SSE2) DEFINE_FUNCTION(f64, sse2); #endif -#if defined(HAVE_AVX) -DEFINE_FUNCTION(f32, avx); +#if defined(HAVE_AVX2) +DEFINE_FUNCTION(f32, avx2); #endif diff --git a/spa/plugins/audiomixer/test-mix-ops.c b/spa/plugins/audiomixer/test-mix-ops.c index 2092de48c..39c0c4a6b 100644 --- a/spa/plugins/audiomixer/test-mix-ops.c +++ b/spa/plugins/audiomixer/test-mix-ops.c @@ -220,11 +220,11 @@ static void test_f32(void) run_test("test_f32_4_sse", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_sse); } #endif -#if defined(HAVE_AVX) - if (cpu_flags & SPA_CPU_FLAG_AVX) { - run_test("test_f32_0_avx", NULL, 0, out, sizeof(out), SPA_N_ELEMENTS(out), mix_f32_avx); - run_test("test_f32_1_avx", src, 1, in_1, sizeof(in_1), SPA_N_ELEMENTS(in_1), mix_f32_avx); - run_test("test_f32_4_avx", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_avx); +#if defined(HAVE_AVX2) + if (cpu_flags & SPA_CPU_FLAG_AVX2) { + run_test("test_f32_0_avx", NULL, 0, out, sizeof(out), SPA_N_ELEMENTS(out), mix_f32_avx2); + run_test("test_f32_1_avx", src, 1, in_1, sizeof(in_1), SPA_N_ELEMENTS(in_1), mix_f32_avx2); + run_test("test_f32_4_avx", src, 4, out_4, sizeof(out_4), SPA_N_ELEMENTS(out_4), mix_f32_avx2); } #endif } diff --git a/spa/plugins/filter-graph/audio-dsp-avx.c b/spa/plugins/filter-graph/audio-dsp-avx2.c similarity index 93% rename from spa/plugins/filter-graph/audio-dsp-avx.c rename to spa/plugins/filter-graph/audio-dsp-avx2.c index 9b49fb556..76c7b17d5 100644 --- a/spa/plugins/filter-graph/audio-dsp-avx.c +++ b/spa/plugins/filter-graph/audio-dsp-avx2.c @@ -17,7 +17,7 @@ #include -static void dsp_add_avx(void *obj, float *dst, const float * SPA_RESTRICT src[], +static void dsp_add_avx2(void *obj, float *dst, const float * SPA_RESTRICT src[], uint32_t n_src, uint32_t n_samples) { uint32_t n, i, unrolled; @@ -62,7 +62,7 @@ static void dsp_add_avx(void *obj, float *dst, const float * SPA_RESTRICT src[], } } -static void dsp_add_1_gain_avx(void *obj, float *dst, const float * SPA_RESTRICT src[], +static void dsp_add_1_gain_avx2(void *obj, float *dst, const float * SPA_RESTRICT src[], uint32_t n_src, float gain, uint32_t n_samples) { uint32_t n, i, unrolled; @@ -111,7 +111,7 @@ static void dsp_add_1_gain_avx(void *obj, float *dst, const float * SPA_RESTRICT } } -static void dsp_add_n_gain_avx(void *obj, float *dst, +static void dsp_add_n_gain_avx2(void *obj, float *dst, const float * SPA_RESTRICT src[], uint32_t n_src, float gain[], uint32_t n_gain, uint32_t n_samples) { @@ -163,7 +163,7 @@ static void dsp_add_n_gain_avx(void *obj, float *dst, } -void dsp_mix_gain_avx(void *obj, +void dsp_mix_gain_avx2(void *obj, float * SPA_RESTRICT dst, const float * SPA_RESTRICT src[], uint32_t n_src, float gain[], uint32_t n_gain, uint32_t n_samples) @@ -175,15 +175,15 @@ void dsp_mix_gain_avx(void *obj, spa_memcpy(dst, src[0], n_samples * sizeof(float)); } else { if (n_gain == 0) - dsp_add_avx(obj, dst, src, n_src, n_samples); + dsp_add_avx2(obj, dst, src, n_src, n_samples); else if (n_gain < n_src) - dsp_add_1_gain_avx(obj, dst, src, n_src, gain[0], n_samples); + dsp_add_1_gain_avx2(obj, dst, src, n_src, gain[0], n_samples); else - dsp_add_n_gain_avx(obj, dst, src, n_src, gain, n_gain, n_samples); + dsp_add_n_gain_avx2(obj, dst, src, n_src, gain, n_gain, n_samples); } } -void dsp_sum_avx(void *obj, float *r, const float *a, const float *b, uint32_t n_samples) +void dsp_sum_avx2(void *obj, float *r, const float *a, const float *b, uint32_t n_samples) { uint32_t n, unrolled; __m256 in[4]; @@ -246,7 +246,7 @@ inline static __m256 _mm256_mul_pz(__m256 ab, __m256 cd) return _mm256_addsub_ps(x0, x1); } -void dsp_fft_cmul_avx(void *obj, void *fft, +void dsp_fft_cmul_avx2(void *obj, void *fft, float * SPA_RESTRICT dst, const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t len, const float scale) { @@ -283,7 +283,7 @@ void dsp_fft_cmul_avx(void *obj, void *fft, #endif } -void dsp_fft_cmuladd_avx(void *obj, void *fft, +void dsp_fft_cmuladd_avx2(void *obj, void *fft, float * SPA_RESTRICT dst, const float * SPA_RESTRICT src, const float * SPA_RESTRICT a, const float * SPA_RESTRICT b, uint32_t len, const float scale) diff --git a/spa/plugins/filter-graph/audio-dsp-impl.h b/spa/plugins/filter-graph/audio-dsp-impl.h index d60f346a3..a5d3c975f 100644 --- a/spa/plugins/filter-graph/audio-dsp-impl.h +++ b/spa/plugins/filter-graph/audio-dsp-impl.h @@ -84,11 +84,11 @@ MAKE_DELAY_FUNC(sse); MAKE_FFT_CMUL_FUNC(sse); MAKE_FFT_CMULADD_FUNC(sse); #endif -#if defined (HAVE_AVX) -MAKE_MIX_GAIN_FUNC(avx); -MAKE_SUM_FUNC(avx); -MAKE_FFT_CMUL_FUNC(avx); -MAKE_FFT_CMULADD_FUNC(avx); +#if defined (HAVE_AVX2) +MAKE_MIX_GAIN_FUNC(avx2); +MAKE_SUM_FUNC(avx2); +MAKE_FFT_CMUL_FUNC(avx2); +MAKE_FFT_CMULADD_FUNC(avx2); #endif #endif /* DSP_OPS_IMPL_H */ diff --git a/spa/plugins/filter-graph/audio-dsp.c b/spa/plugins/filter-graph/audio-dsp.c index fadd85349..133b53db5 100644 --- a/spa/plugins/filter-graph/audio-dsp.c +++ b/spa/plugins/filter-graph/audio-dsp.c @@ -23,13 +23,13 @@ struct dsp_info { static const struct dsp_info dsp_table[] = { -#if defined (HAVE_AVX) - { SPA_CPU_FLAG_AVX, +#if defined (HAVE_AVX2) + { SPA_CPU_FLAG_AVX2, .funcs.clear = dsp_clear_c, .funcs.copy = dsp_copy_c, - .funcs.mix_gain = dsp_mix_gain_avx, + .funcs.mix_gain = dsp_mix_gain_avx2, .funcs.biquad_run = dsp_biquad_run_sse, - .funcs.sum = dsp_sum_avx, + .funcs.sum = dsp_sum_avx2, .funcs.linear = dsp_linear_c, .funcs.mult = dsp_mult_c, .funcs.fft_new = dsp_fft_new_c, @@ -38,8 +38,8 @@ static const struct dsp_info dsp_table[] = .funcs.fft_memfree = dsp_fft_memfree_c, .funcs.fft_memclear = dsp_fft_memclear_c, .funcs.fft_run = dsp_fft_run_c, - .funcs.fft_cmul = dsp_fft_cmul_avx, - .funcs.fft_cmuladd = dsp_fft_cmuladd_avx, + .funcs.fft_cmul = dsp_fft_cmul_avx2, + .funcs.fft_cmuladd = dsp_fft_cmuladd_avx2, .funcs.delay = dsp_delay_sse, }, #endif diff --git a/spa/plugins/filter-graph/meson.build b/spa/plugins/filter-graph/meson.build index 1995ae4c1..94ee0bd25 100644 --- a/spa/plugins/filter-graph/meson.build +++ b/spa/plugins/filter-graph/meson.build @@ -18,16 +18,16 @@ if have_sse simd_cargs += ['-DHAVE_SSE'] simd_dependencies += filter_graph_sse endif -if have_avx - filter_graph_avx = static_library('filter_graph_avx', - ['audio-dsp-avx.c' ], +if have_avx2 + filter_graph_avx2 = static_library('filter_graph_avx2', + ['audio-dsp-avx2.c' ], include_directories : [configinc], - c_args : [avx_args, fma_args,'-O3', '-DHAVE_AVX'], + c_args : [avx2_args, fma_args,'-O3', '-DHAVE_AVX2'], dependencies : [ spa_dep ], install : false ) - simd_cargs += ['-DHAVE_AVX'] - simd_dependencies += filter_graph_avx + simd_cargs += ['-DHAVE_AVX2'] + simd_dependencies += filter_graph_avx2 endif if have_neon filter_graph_neon = static_library('filter_graph_neon',