[FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product
James Almer
jamrial at gmail.com
Thu May 30 22:10:28 EEST 2024
On 5/30/2024 4:06 PM, Rémi Denis-Courmont wrote:
> The function pointer is appended to the structure for backward binary
> compatibility. Fortunately, this is allocated by libavutil, not by the
> user, so increasing the structure size is safe.
> ---
> libavutil/float_dsp.c | 12 ++++++++++++
> libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
> 2 files changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
> index e9fb023466..08bbc85e3e 100644
> --- a/libavutil/float_dsp.c
> +++ b/libavutil/float_dsp.c
> @@ -132,6 +132,17 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
> return p;
> }
>
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> + size_t len)
> +{
> + double p = 0.0;
> +
> + for (size_t i = 0; i < len; i++)
> + p += v1[i] * v2[i];
> +
> + return p;
> +}
> +
> av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
> {
> AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
> @@ -149,6 +160,7 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
> fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
> fdsp->butterflies_float = butterflies_float_c;
> fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
> + fdsp->scalarproduct_double = ff_scalarproduct_double_c;
>
> #if ARCH_AARCH64
> ff_float_dsp_init_aarch64(fdsp);
> diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
> index 342a8715c5..5053aa240d 100644
> --- a/libavutil/float_dsp.h
> +++ b/libavutil/float_dsp.h
> @@ -19,6 +19,8 @@
> #ifndef AVUTIL_FLOAT_DSP_H
> #define AVUTIL_FLOAT_DSP_H
>
> +#include <stddef.h>
> +
> typedef struct AVFloatDSPContext {
> /**
> * Calculate the entry wise product of two vectors of floats and store the result in
> @@ -187,19 +189,46 @@ typedef struct AVFloatDSPContext {
> */
> void (*vector_dmul)(double *dst, const double *src0, const double *src1,
> int len);
> +
> + /**
> + * Calculate the scalar product of two vectors of doubles.
> + *
> + * @param v1 first vector
> + * @param v2 second vector
> + * @param len length of vectors
> + *
> + * @return inner product of the vectors
> + */
> + double (*scalarproduct_double)(const double *v1, const double *v2,
> + size_t len);
> } AVFloatDSPContext;
>
> /**
> - * Return the scalar product of two vectors.
> + * Return the scalar product of two vectors of floats.
> *
> * @param v1 first input vector
> + * constraints: 32-byte aligned
> * @param v2 first input vector
> + * constraints: 32-byte aligned
> * @param len number of elements
> + * constraints: multiple of 16
Why are you adding this to the doxy for scalarproduct_float()? Those
constrains are not correct for it. They are for scalarproduct_double()
which you're adding now.
> *
> * @return sum of elementwise products
> */
> float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
>
> +/**
> + * Return the scalar product of two vectors of doubles.
> + *
> + * @param v1 first input vector
> + * @param v2 first input vector
> + * @param len number of elements
> + *
> + * @return inner product of the vectors
> + */
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> + size_t len);
> +
> void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
More information about the ffmpeg-devel
mailing list