[FFmpeg-devel] [PATCH] checkasm: add (private) kperf timing for macOS

Martin Storsjö martin at martin.st
Tue Apr 13 15:45:01 EEST 2021


On Tue, 13 Apr 2021, Josh Dekker wrote:

> Signed-off-by: Josh Dekker <josh at itanimul.li>
> ---
> configure                    |   2 +
> tests/checkasm/Makefile      |   1 +
> tests/checkasm/checkasm.c    |  19 ++++-
> tests/checkasm/checkasm.h    |  10 ++-
> tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++
> tests/checkasm/macos_kperf.h |  23 ++++++
> 6 files changed, 195 insertions(+), 3 deletions(-)
> create mode 100644 tests/checkasm/macos_kperf.c
> create mode 100644 tests/checkasm/macos_kperf.h
>
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index ef6645e3a2..4127081d74 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -31,6 +31,8 @@
> #include <sys/ioctl.h>
> #include <asm/unistd.h>
> #include <linux/perf_event.h>
> +#elif CONFIG_MACOS_KPERF
> +#include "macos_kperf.h"
> #endif
> 
> #include "libavutil/avstring.h"
> @@ -224,7 +226,7 @@ typedef struct CheckasmPerf {
>     int iterations;
> } CheckasmPerf;
> 
> -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
> +#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF
> 
> #if CONFIG_LINUX_PERF
> #define PERF_START(t) do {                              \
> @@ -235,6 +237,12 @@ typedef struct CheckasmPerf {
>     ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0);            \
>     read(sysfd, &t, sizeof(t));                         \
> } while (0)
> +#elif CONFIG_MACOS_KPERF
> +#define PERF_START(t) do {                              \
> +    t = 0;                                              \
> +    ff_kperf_cycles(&t);                                \
> +} while (0)
> +#define PERF_STOP(t) ff_kperf_cycles(&t)
> #else

I find this implicit subtraction in ff_kperf_cycles() highly surprising 
and hard to reason about.

E.g. if I do t=0; followed by ff_kperf_cycles(&t) three times - what value 
do I have in 't'? With the current code, the answer is "the difference 
between the absolute current timer minus the number of cycles it took 
between the first and second call", and that doesn't sound like something 
that is useful to anybody, ever.

Just make the function return the value as-is and do the subtraction here.


> #define PERF_START(t) t = AV_READ_TIME()
> #define PERF_STOP(t)  t = AV_READ_TIME() - t
> diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c
> new file mode 100644
> index 0000000000..e6ae316608
> --- /dev/null
> +++ b/tests/checkasm/macos_kperf.c
> @@ -0,0 +1,143 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include "macos_kperf.h"
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <dlfcn.h>
> +
> +#define KPERF_LIST                                             \
> +    F(int, kpc_get_counting, void)                             \
> +    F(int, kpc_force_all_ctrs_set, int)                        \
> +    F(int, kpc_set_counting, uint32_t)                         \
> +    F(int, kpc_set_thread_counting, uint32_t)                  \
> +    F(int, kpc_set_config, uint32_t, void *)                   \
> +    F(int, kpc_get_config, uint32_t, void *)                   \
> +    F(int, kpc_set_period, uint32_t, void *)                   \
> +    F(int, kpc_get_period, uint32_t, void *)                   \
> +    F(uint32_t, kpc_get_counter_count, uint32_t)               \
> +    F(uint32_t, kpc_get_config_count, uint32_t)                \
> +    F(int, kperf_sample_get, int *)                            \
> +    F(int, kpc_get_thread_counters, int, unsigned int, void *)
> +
> +#define F(ret, name, ...)                                      \
> +    typedef ret name##proc(__VA_ARGS__);                       \
> +    static name##proc *name = NULL;
> +KPERF_LIST
> +#undef F
> +
> +#define CFGWORD_EL0A32EN_MASK (0x10000)
> +#define CFGWORD_EL0A64EN_MASK (0x20000)
> +#define CFGWORD_EL1EN_MASK    (0x40000)
> +#define CFGWORD_EL3EN_MASK    (0x80000)
> +#define CFGWORD_ALLMODES_MASK (0xf0000)
> +
> +#define CPMU_NONE 0
> +#define CPMU_CORE_CYCLE 0x02
> +#define CPMU_INST_A64 0x8c
> +#define CPMU_INST_BRANCH 0x8d
> +#define CPMU_SYNC_DC_LOAD_MISS 0xbf
> +#define CPMU_SYNC_DC_STORE_MISS 0xc0
> +#define CPMU_SYNC_DTLB_MISS 0xc1
> +#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4
> +#define CPMU_SYNC_BR_ANY_MISP 0xcb
> +#define CPMU_FED_IC_MISS_DEM 0xd3
> +#define CPMU_FED_ITLB_MISS 0xd4
> +
> +#define KPC_CLASS_FIXED_MASK        (1 << 0)
> +#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
> +#define KPC_CLASS_POWER_MASK        (1 << 2)
> +#define KPC_CLASS_RAWPMU_MASK       (1 << 3)
> +
> +#define COUNTERS_COUNT 10
> +#define CONFIG_COUNT 8

Is this value actually needed for anything, other than verifying the 
return value of kpc_get_config_count? It looks to me like one could omit 
it. (Then again, I know this API is undocumented and all, so it's not 
really known how it's supposed to be used.)

> +#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
> +
> +int ff_kperf_setup()
> +{
> +    uint64_t config[COUNTERS_COUNT] = {0};
> +    config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
> +    // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK;
> +    // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK;
> +    // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK;
> +
> +    if (kpc_set_config(KPC_MASK, config)) {
> +        fprintf(stderr, "kperf: kpc_set_config failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_force_all_ctrs_set(1)) {
> +        fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_set_counting(KPC_MASK)) {
> +        fprintf(stderr, "kperf: kpc_set_counting failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_set_thread_counting(KPC_MASK)) {
> +        fprintf(stderr, "kperf: kpc_set_thread_counting failed\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_kperf_init()
> +{
> +    void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY);
> +    if (!kperf) {
> +        fprintf(stderr, "kperf: kperf = %p\n", kperf);

If it failed, then this will always print (null), so passing the variable 
to it right now is kinda pointless. A more useful error might be available 
via dlerror().

> +        return -1;
> +    }
> +
> +#define F(ret, name, ...)                            \
> +    name = (name##proc *)(dlsym(kperf, #name));      \
> +    if (!name) {                                     \
> +        fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name);    \

The second parameter here will always be null too

> +        return -1;                                   \
> +    }
> +    KPERF_LIST
> +#undef F
> +
> +    if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {

This is also in the field of unknown API usage, but I guess in one sense, 
one could consider also allowing it to work if counter_count >= 1 or 
however many we actually use. But I guess we'd need to enlarge the stack 
buffers a bit more in that case, to allow an actual value up to whatever 
we've allocated.

> +        fprintf(stderr, "kperf: wrong fixed counters count\n");
> +        return -1;
> +    }
> +
> +    if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
> +        fprintf(stderr, "kperf: wrong fixed config count\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_kperf_cycles(uint64_t *cycles)
> +{
> +    uint64_t counters[COUNTERS_COUNT];
> +    if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) {
> +        return -1;
> +    }
> +
> +    if (cycles)
> +        *cycles = counters[0] - *cycles;

As point out in checkasm.h, move the subtraction there; having it here 
makes the function very strange to reason about.

// Martin



More information about the ffmpeg-devel mailing list