From 184d0274eefa5f2d36b62e829d130ba448f586e8 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Wed, 29 Jan 2025 12:06:24 +0100 Subject: [PATCH 001/143] [CTL] Add CTL functionality (by handle access) This commit introduces the control and introspection mechanism that can be accessed using pointer to supported pool or provider. --- include/umf/base.h | 37 +++++ include/umf/memory_pool_ops.h | 16 +++ include/umf/memory_provider_ops.h | 18 ++- src/ctl/ctl.c | 172 ++++++++++++++++++------ src/ctl/ctl.h | 63 +++++---- src/libumf.c | 7 + src/libumf.def | 3 + src/libumf.map | 3 + src/memory_pool.c | 30 +++++ src/memory_provider.c | 29 ++++ src/memory_provider_internal.h | 5 + src/pool/pool_scalable.c | 41 +++++- src/provider/provider_os_memory.c | 44 +++++- test/CMakeLists.txt | 10 +- test/ctl/config.txt | 4 +- test/ctl/ctl_api.cpp | 142 +++++++++++++++++++ test/ctl/ctl_debug.c | 63 +++++---- test/ctl/{test.cpp => ctl_unittest.cpp} | 0 18 files changed, 591 insertions(+), 96 deletions(-) create mode 100644 test/ctl/ctl_api.cpp rename test/ctl/{test.cpp => ctl_unittest.cpp} (100%) diff --git a/include/umf/base.h b/include/umf/base.h index 8dad184f2..cc6b0ccbd 100644 --- a/include/umf/base.h +++ b/include/umf/base.h @@ -50,6 +50,43 @@ typedef enum umf_result_t { UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown or internal error } umf_result_t; +/// @brief Type of the CTL query +typedef enum umf_ctl_query_type { + CTL_QUERY_READ, + CTL_QUERY_WRITE, + CTL_QUERY_RUNNABLE, + CTL_QUERY_SUBTREE, + + MAX_CTL_QUERY_TYPE +} umf_ctl_query_type_t; + +/// +/// @brief Get value of a specified attribute at the given name. +/// @param name name of an attribute to be retrieved +/// @param ctx pointer to the pool or the provider +/// @param arg [out] pointer to the variable where the value will be stored +/// @return UMF_RESULT_SUCCESS on success or UMF_RESULT_ERROR_UNKNOWN on failure. +/// +umf_result_t umfCtlGet(const char *name, void *ctx, void *arg); + +/// +/// @brief Set value of a specified attribute at the given name. +/// @param name name of an attribute to be set +/// @param ctx pointer to the pool or the provider +/// @param arg [in] pointer to the value that will be set +/// @return UMF_RESULT_SUCCESS on success or UMF_RESULT_ERROR_UNKNOWN on failure. +/// +umf_result_t umfCtlSet(const char *name, void *ctx, void *arg); + +/// +/// @brief Execute callback related with the specified attribute. +/// @param name name of an attribute to be executed +/// @param ctx pointer to the pool or the provider +/// @param arg [in/out] pointer to the value, can be used as an input or output +/// @return UMF_RESULT_SUCCESS on success or UMF_RESULT_ERROR_UNKNOWN on failure. +/// +umf_result_t umfCtlExec(const char *name, void *ctx, void *arg); + #ifdef __cplusplus } #endif diff --git a/include/umf/memory_pool_ops.h b/include/umf/memory_pool_ops.h index 657f40aea..bf44383b4 100644 --- a/include/umf/memory_pool_ops.h +++ b/include/umf/memory_pool_ops.h @@ -125,6 +125,22 @@ typedef struct umf_memory_pool_ops_t { /// The value is undefined if the previous allocation was successful. /// umf_result_t (*get_last_allocation_error)(void *pool); + + /// + /// @brief Control operation for the memory pool. + /// The function is used to perform various control operations + /// on the memory pool. + /// + /// @param hPool handle to the memory pool. + /// @param operationType type of the operation to be performed. + /// @param name name associated with the operation. + /// @param arg argument for the operation. + /// @param queryType type of the query to be performed. + /// + /// @return umf_result_t result of the control operation. + /// + umf_result_t (*ctl)(void *hPool, int operationType, const char *name, + void *arg, umf_ctl_query_type_t queryType); } umf_memory_pool_ops_t; #ifdef __cplusplus diff --git a/include/umf/memory_provider_ops.h b/include/umf/memory_provider_ops.h index aaddd503b..638f2975b 100644 --- a/include/umf/memory_provider_ops.h +++ b/include/umf/memory_provider_ops.h @@ -82,7 +82,6 @@ typedef struct umf_memory_provider_ext_ops_t { /// umf_result_t (*allocation_split)(void *hProvider, void *ptr, size_t totalSize, size_t firstSize); - } umf_memory_provider_ext_ops_t; /// @@ -250,6 +249,23 @@ typedef struct umf_memory_provider_ops_t { /// @brief Optional IPC ops. The API allows sharing of memory objects across different processes. /// umf_memory_provider_ipc_ops_t ipc; + + /// + /// @brief Control operation for the memory provider. + /// The function is used to perform various control operations + /// on the memory provider. + /// + /// @param hProvider handle to the memory provider. + /// @param operationType type of the operation to be performed. + /// @param name name associated with the operation. + /// @param arg argument for the operation. + /// @param queryType type of the query to be performed. + /// + /// @return umf_result_t result of the control operation. + /// + umf_result_t (*ctl)(void *hProvider, int operationType, const char *name, + void *arg, umf_ctl_query_type_t queryType); + } umf_memory_provider_ops_t; #ifdef __cplusplus diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c index 4db11ac21..99ab2d96e 100644 --- a/src/ctl/ctl.c +++ b/src/ctl/ctl.c @@ -24,6 +24,8 @@ #include #include +#include + #include "base_alloc/base_alloc_global.h" #include "utils/utils_common.h" #include "utlist.h" @@ -43,8 +45,9 @@ #define CTL_QUERY_NODE_SEPARATOR "." #define CTL_VALUE_ARG_SEPARATOR "," +/* GLOBAL TREE */ static int ctl_global_first_free = 0; -static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; +static umf_ctl_node_t CTL_NODE(global)[CTL_MAX_ENTRIES]; /* * This is the top level node of the ctl tree structure. Each node can contain @@ -57,7 +60,7 @@ static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; * convenience. */ struct ctl { - struct ctl_node root[CTL_MAX_ENTRIES]; + umf_ctl_node_t root[CTL_MAX_ENTRIES]; int first_free; }; @@ -78,17 +81,52 @@ char *Strdup(const char *s) { return p; } +umf_result_t umfCtlGet(const char *name, void *ctx, void *arg) { + if (name == NULL || arg == NULL || ctx == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + return ctl_query(NULL, ctx, CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_READ, + arg) + ? UMF_RESULT_ERROR_UNKNOWN + : UMF_RESULT_SUCCESS; +} + +umf_result_t umfCtlSet(const char *name, void *ctx, void *arg) { + if (name == NULL || arg == NULL || ctx == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + return ctl_query(NULL, ctx, CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_WRITE, + arg) + ? UMF_RESULT_ERROR_UNKNOWN + : UMF_RESULT_SUCCESS; +} + +umf_result_t umfCtlExec(const char *name, void *ctx, void *arg) { + if (name == NULL || arg == NULL || ctx == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + return ctl_query(NULL, ctx, CTL_QUERY_PROGRAMMATIC, name, + CTL_QUERY_RUNNABLE, arg) + ? UMF_RESULT_ERROR_UNKNOWN + : UMF_RESULT_SUCCESS; +} + /* * ctl_find_node -- (internal) searches for a matching entry point in the * provided nodes * + * Name offset is used to return the offset of the name in the query string. * The caller is responsible for freeing all of the allocated indexes, * regardless of the return value. */ -static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, - const char *name, - struct ctl_index_utlist *indexes) { - const struct ctl_node *n = NULL; +static const umf_ctl_node_t *ctl_find_node(const umf_ctl_node_t *nodes, + const char *name, + umf_ctl_index_utlist_t *indexes, + size_t *name_offset) { + assert(nodes != NULL); + assert(name != NULL); + assert(name_offset != NULL); + const umf_ctl_node_t *n = NULL; char *sptr = NULL; char *parse_str = Strdup(name); if (parse_str == NULL) { @@ -102,6 +140,11 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, * in the main ctl tree. */ while (node_name != NULL) { + *name_offset = node_name - parse_str; + if (n != NULL && n->type == CTL_NODE_SUBTREE) { + // if a subtree occurs, the subtree handler should be called + break; + } char *endptr; /* * Ignore errno from strtol: FreeBSD returns EINVAL if no @@ -111,7 +154,7 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, int tmp_errno = errno; long index_value = strtol(node_name, &endptr, 0); errno = tmp_errno; - struct ctl_index_utlist *index_entry = NULL; + umf_ctl_index_utlist_t *index_entry = NULL; if (endptr != node_name) { /* a valid index */ index_entry = umf_ba_global_alloc(sizeof(*index_entry)); if (index_entry == NULL) { @@ -128,6 +171,7 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, break; } } + if (n->name == NULL) { goto error; } @@ -152,11 +196,11 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, * ctl_delete_indexes -- * (internal) removes and frees all entries on the index list */ -static void ctl_delete_indexes(struct ctl_index_utlist *indexes) { +static void ctl_delete_indexes(umf_ctl_index_utlist_t *indexes) { if (!indexes) { return; } - struct ctl_index_utlist *elem, *tmp; + umf_ctl_index_utlist_t *elem, *tmp; LL_FOREACH_SAFE(indexes, elem, tmp) { LL_DELETE(indexes, elem); if (elem) { @@ -201,8 +245,8 @@ static void *ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) { * ctl_query_get_real_args -- (internal) returns a pointer with actual argument * structure as required by the node callback */ -static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, - enum ctl_query_source source) { +static void *ctl_query_get_real_args(const umf_ctl_node_t *n, void *write_arg, + umf_ctl_query_source_t source) { void *real_arg = NULL; switch (source) { case CTL_QUERY_CONFIG_INPUT: @@ -222,9 +266,8 @@ static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, * ctl_query_cleanup_real_args -- (internal) cleanups relevant argument * structures allocated as a result of the get_real_args call */ -static void ctl_query_cleanup_real_args(const struct ctl_node *n, - void *real_arg, - enum ctl_query_source source) { +static void ctl_query_cleanup_real_args(const umf_ctl_node_t *n, void *real_arg, + umf_ctl_query_source_t source) { /* suppress unused-parameter errors */ (void)n; @@ -242,23 +285,38 @@ static void ctl_query_cleanup_real_args(const struct ctl_node *n, /* * ctl_exec_query_read -- (internal) calls the read callback of a node */ -static int ctl_exec_query_read(void *ctx, const struct ctl_node *n, - enum ctl_query_source source, void *arg, - struct ctl_index_utlist *indexes) { +static int ctl_exec_query_read(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + (void)extra_name, (void)query_type; + assert(n != NULL); + assert(n->cb[CTL_QUERY_READ] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + if (arg == NULL) { errno = EINVAL; return -1; } - return n->cb[CTL_QUERY_READ](ctx, source, arg, indexes); + return n->cb[CTL_QUERY_READ](ctx, source, arg, indexes, NULL, + MAX_CTL_QUERY_TYPE); } /* * ctl_exec_query_write -- (internal) calls the write callback of a node */ -static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, - enum ctl_query_source source, void *arg, - struct ctl_index_utlist *indexes) { +static int ctl_exec_query_write(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + (void)extra_name, (void)query_type; + assert(n != NULL); + assert(n->cb[CTL_QUERY_WRITE] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + if (arg == NULL) { errno = EINVAL; return -1; @@ -269,7 +327,8 @@ static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, return -1; } - int ret = n->cb[CTL_QUERY_WRITE](ctx, source, real_arg, indexes); + int ret = n->cb[CTL_QUERY_WRITE](ctx, source, real_arg, indexes, NULL, + MAX_CTL_QUERY_TYPE); ctl_query_cleanup_real_args(n, real_arg, source); return ret; @@ -278,26 +337,50 @@ static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, /* * ctl_exec_query_runnable -- (internal) calls the run callback of a node */ -static int ctl_exec_query_runnable(void *ctx, const struct ctl_node *n, - enum ctl_query_source source, void *arg, - struct ctl_index_utlist *indexes) { - return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes); +static int ctl_exec_query_runnable(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + (void)extra_name, (void)query_type; + assert(n != NULL); + assert(n->cb[CTL_QUERY_RUNNABLE] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes, NULL, + MAX_CTL_QUERY_TYPE); } -static int (*ctl_exec_query[MAX_CTL_QUERY_TYPE])( - void *ctx, const struct ctl_node *n, enum ctl_query_source source, - void *arg, struct ctl_index_utlist *indexes) = { +static int ctl_exec_query_subtree(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + assert(n != NULL); + assert(n->cb[CTL_QUERY_SUBTREE] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + return n->cb[CTL_QUERY_SUBTREE](ctx, source, arg, indexes, extra_name, + query_type); +} + +typedef int (*umf_ctl_exec_query_t)(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type); + +static umf_ctl_exec_query_t ctl_exec_query[MAX_CTL_QUERY_TYPE] = { ctl_exec_query_read, ctl_exec_query_write, ctl_exec_query_runnable, + ctl_exec_query_subtree, }; /* * ctl_query -- (internal) parses the name and calls the appropriate methods * from the ctl tree */ -int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, - const char *name, enum ctl_query_type type, void *arg) { +int ctl_query(struct ctl *ctl, void *ctx, umf_ctl_query_source_t source, + const char *name, umf_ctl_query_type_t type, void *arg) { if (name == NULL) { errno = EINVAL; return -1; @@ -308,29 +391,36 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, * easily retrieve the index values. The list is cleared once the ctl * query has been handled. */ - struct ctl_index_utlist *indexes = NULL; + umf_ctl_index_utlist_t *indexes = NULL; indexes = Zalloc(sizeof(*indexes)); if (!indexes) { return -1; } int ret = -1; + size_t name_offset = 0; - const struct ctl_node *n = ctl_find_node(CTL_NODE(global), name, indexes); + const umf_ctl_node_t *n = + ctl_find_node(CTL_NODE(global), name, indexes, &name_offset); if (n == NULL && ctl) { ctl_delete_indexes(indexes); indexes = NULL; - n = ctl_find_node(ctl->root, name, indexes); + n = ctl_find_node(ctl->root, name, indexes, &name_offset); } - if (n == NULL || n->type != CTL_NODE_LEAF || n->cb[type] == NULL) { + // if the appropriate node (leaf or subtree) is not found, then return error + if (n == NULL || + (n->type != CTL_NODE_LEAF && n->type != CTL_NODE_SUBTREE) || + n->cb[n->type == CTL_NODE_SUBTREE ? CTL_QUERY_SUBTREE : type] == NULL) { errno = EINVAL; goto out; } - ret = ctl_exec_query[type](ctx, n, source, arg, indexes); - + const char *extra_name = &name[0] + name_offset; + ret = + ctl_exec_query[n->type == CTL_NODE_SUBTREE ? CTL_QUERY_SUBTREE : type]( + ctx, n, source, arg, indexes, extra_name, type); out: ctl_delete_indexes(indexes); @@ -341,10 +431,10 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, * ctl_register_module_node -- adds a new node to the CTL tree root. */ void ctl_register_module_node(struct ctl *c, const char *name, - struct ctl_node *n) { - struct ctl_node *nnode = c == NULL - ? &CTL_NODE(global)[ctl_global_first_free++] - : &c->root[c->first_free++]; + umf_ctl_node_t *n) { + umf_ctl_node_t *nnode = c == NULL + ? &CTL_NODE(global)[ctl_global_first_free++] + : &c->root[c->first_free++]; nnode->children = n; nnode->type = CTL_NODE_NAMED; diff --git a/src/ctl/ctl.h b/src/ctl/ctl.h index 9327b01af..968998fc2 100644 --- a/src/ctl/ctl.h +++ b/src/ctl/ctl.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2016-2024 Intel Corporation + * Copyright (C) 2016-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -21,19 +21,21 @@ #include #include +#include + #ifdef __cplusplus extern "C" { #endif struct ctl; -struct ctl_index_utlist { +typedef struct ctl_index_utlist { const char *name; long value; struct ctl_index_utlist *next; -}; +} umf_ctl_index_utlist_t; -enum ctl_query_source { +typedef enum ctl_query_source { CTL_UNKNOWN_QUERY_SOURCE, /* query executed directly from the program */ CTL_QUERY_PROGRAMMATIC, @@ -41,24 +43,19 @@ enum ctl_query_source { CTL_QUERY_CONFIG_INPUT, MAX_CTL_QUERY_SOURCE -}; - -enum ctl_query_type { - CTL_QUERY_READ, - CTL_QUERY_WRITE, - CTL_QUERY_RUNNABLE, +} umf_ctl_query_source_t; - MAX_CTL_QUERY_TYPE -}; - -typedef int (*node_callback)(void *ctx, enum ctl_query_source type, void *arg, - struct ctl_index_utlist *indexes); +typedef int (*node_callback)(void *ctx, umf_ctl_query_source_t type, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type); enum ctl_node_type { CTL_NODE_UNKNOWN, CTL_NODE_NAMED, CTL_NODE_LEAF, CTL_NODE_INDEXED, + CTL_NODE_SUBTREE, MAX_CTL_NODE }; @@ -91,7 +88,7 @@ struct ctl_argument { * CTL Tree node structure, do not use directly. All the necessary functionality * is provided by the included macros. */ -struct ctl_node { +typedef struct ctl_node { const char *name; enum ctl_node_type type; @@ -99,11 +96,13 @@ struct ctl_node { const struct ctl_argument *arg; const struct ctl_node *children; -}; +} umf_ctl_node_t; struct ctl *ctl_new(void); void ctl_delete(struct ctl *stats); +void initialize_global_ctl(void); + int ctl_load_config_from_string(struct ctl *ctl, void *ctx, const char *cfg_string); int ctl_load_config_from_file(struct ctl *ctl, void *ctx, const char *cfg_file); @@ -138,8 +137,8 @@ int ctl_arg_string(const void *arg, void *dest, size_t dest_size); #define CTL_NODE(name, ...) ctl_node_##__VA_ARGS__##_##name -int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, - const char *name, enum ctl_query_type type, void *arg); +int ctl_query(struct ctl *ctl, void *ctx, umf_ctl_query_source_t source, + const char *name, umf_ctl_query_type_t type, void *arg); /* Declaration of a new child node */ #define CTL_CHILD(name, ...) \ @@ -161,6 +160,8 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_RUNNABLE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_runnable +#define CTL_SUBTREE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_subtree + #define CTL_ARG(name) ctl_arg_##name /* @@ -170,7 +171,8 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RO(name, ...) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {CTL_READ_HANDLER(name, __VA_ARGS__), NULL, NULL}, NULL, NULL \ + {CTL_READ_HANDLER(name, __VA_ARGS__), NULL, NULL, NULL}, NULL, \ + NULL \ } /* @@ -180,7 +182,7 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_WO(name, ...) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {NULL, CTL_WRITE_HANDLER(name, __VA_ARGS__), NULL}, \ + {NULL, CTL_WRITE_HANDLER(name, __VA_ARGS__), NULL, NULL}, \ &CTL_ARG(name), NULL \ } @@ -191,7 +193,22 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RUNNABLE(name, ...) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {NULL, NULL, CTL_RUNNABLE_HANDLER(name, __VA_ARGS__)}, NULL, NULL \ + {NULL, NULL, CTL_RUNNABLE_HANDLER(name, __VA_ARGS__), NULL}, NULL, \ + NULL \ + } + +#define CTL_LEAF_SUBTREE(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_SUBTREE, \ + {NULL, NULL, NULL, CTL_SUBTREE_HANDLER(name, __VA_ARGS__)}, NULL, \ + NULL \ + } + +#define CTL_LEAF_SUBTREE2(name, fun, ...) \ + { \ + CTL_STR(name), CTL_NODE_SUBTREE, \ + {NULL, NULL, NULL, CTL_SUBTREE_HANDLER(fun, __VA_ARGS__)}, NULL, \ + NULL \ } /* @@ -201,7 +218,7 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RW(name) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL}, \ + {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL, NULL}, \ &CTL_ARG(name), NULL \ } diff --git a/src/libumf.c b/src/libumf.c index f8f6cc61f..aad0140bb 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -11,6 +11,7 @@ #include "base_alloc_global.h" #include "ipc_cache.h" +#include "memory_provider_internal.h" #include "memspace_internal.h" #include "pool/pool_scalable_internal.h" #include "provider_cuda_internal.h" @@ -26,6 +27,11 @@ umf_memory_tracker_handle_t TRACKER = NULL; static unsigned long long umfRefCount = 0; +static umf_ctl_node_t CTL_NODE(umf)[] = {CTL_CHILD(provider), CTL_CHILD(pool), + CTL_NODE_END}; + +void initialize_global_ctl(void) { CTL_REGISTER_MODULE(NULL, umf); } + int umfInit(void) { if (utils_fetch_and_add64(&umfRefCount, 1) == 0) { utils_log_init(); @@ -44,6 +50,7 @@ int umfInit(void) { } LOG_DEBUG("UMF IPC cache initialized"); + initialize_global_ctl(); } if (TRACKER) { diff --git a/src/libumf.def b/src/libumf.def index ce8820a8f..dd0ddfbfc 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -119,6 +119,9 @@ EXPORTS umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 umfCUDAMemoryProviderParamsSetAllocFlags + umfCtlExec + umfCtlGet + umfCtlSet umfDisjointPoolOps umfDisjointPoolParamsCreate umfDisjointPoolParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index 6582fd0f8..5e97acc09 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -117,6 +117,9 @@ UMF_0.10 { UMF_0.11 { umfCUDAMemoryProviderParamsSetAllocFlags; + umfCtlExec; + umfCtlGet; + umfCtlSet; umfDisjointPoolOps; umfDisjointPoolParamsCreate; umfDisjointPoolParamsDestroy; diff --git a/src/memory_pool.c b/src/memory_pool.c index ef2c0fa66..1b61555de 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -22,6 +22,32 @@ #include "memory_provider_internal.h" #include "provider_tracking.h" +static int CTL_SUBTREE_HANDLER(by_handle_pool)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t queryType) { + (void)indexes, (void)source; + umf_memory_pool_handle_t hPool = (umf_memory_pool_handle_t)ctx; + hPool->ops.ctl(hPool, /*unused*/ 0, extra_name, arg, queryType); + return 0; +} + +umf_ctl_node_t CTL_NODE(pool)[] = {CTL_LEAF_SUBTREE2(by_handle, by_handle_pool), + CTL_NODE_END}; + +static umf_result_t umfDefaultCtlPoolHandle(void *hPool, int operationType, + const char *name, void *arg, + umf_ctl_query_type_t queryType) { + (void)hPool; + (void)operationType; + (void)name; + (void)arg; + (void)queryType; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, umf_memory_provider_handle_t provider, void *params, @@ -58,6 +84,10 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, pool->ops = *ops; pool->tag = NULL; + if (NULL == pool->ops.ctl) { + pool->ops.ctl = umfDefaultCtlPoolHandle; + } + if (NULL == utils_mutex_init(&pool->lock)) { LOG_ERR("Failed to initialize mutex for pool"); ret = UMF_RESULT_ERROR_UNKNOWN; diff --git a/src/memory_provider.c b/src/memory_provider.c index ce6a10a20..fdc8725e0 100644 --- a/src/memory_provider.c +++ b/src/memory_provider.c @@ -18,8 +18,23 @@ #include "base_alloc_global.h" #include "libumf.h" #include "memory_provider_internal.h" +#include "umf/base.h" #include "utils_assert.h" +static int CTL_SUBTREE_HANDLER(by_handle_provider)( + void *ctx, umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, const char *extra_name, + umf_ctl_query_type_t queryType) { + (void)indexes, (void)source; + umf_memory_provider_handle_t hProvider = (umf_memory_provider_handle_t)ctx; + hProvider->ops.ctl(hProvider->provider_priv, /*unused*/ 0, extra_name, arg, + queryType); + return 0; +} + +umf_ctl_node_t CTL_NODE(provider)[] = { + CTL_LEAF_SUBTREE2(by_handle, by_handle_provider), CTL_NODE_END}; + static umf_result_t umfDefaultPurgeLazy(void *provider, void *ptr, size_t size) { (void)provider; @@ -93,6 +108,17 @@ static umf_result_t umfDefaultCloseIPCHandle(void *provider, void *ptr, return UMF_RESULT_ERROR_NOT_SUPPORTED; } +static umf_result_t umfDefaultCtlHandle(void *provider, int operationType, + const char *name, void *arg, + umf_ctl_query_type_t queryType) { + (void)provider; + (void)operationType; + (void)name; + (void)arg; + (void)queryType; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + void assignOpsExtDefaults(umf_memory_provider_ops_t *ops) { if (!ops->ext.purge_lazy) { ops->ext.purge_lazy = umfDefaultPurgeLazy; @@ -124,6 +150,9 @@ void assignOpsIpcDefaults(umf_memory_provider_ops_t *ops) { if (!ops->ipc.close_ipc_handle) { ops->ipc.close_ipc_handle = umfDefaultCloseIPCHandle; } + if (!ops->ctl) { + ops->ctl = umfDefaultCtlHandle; + } } static bool validateOpsMandatory(const umf_memory_provider_ops_t *ops) { diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index dd1111a23..5abc88d3b 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -14,6 +14,8 @@ #include +#include "ctl/ctl.h" + #ifdef __cplusplus extern "C" { #endif @@ -26,6 +28,9 @@ typedef struct umf_memory_provider_t { void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); umf_memory_provider_handle_t *umfGetLastFailedMemoryProviderPtr(void); +extern umf_ctl_node_t CTL_NODE(provider)[]; +extern umf_ctl_node_t CTL_NODE(pool)[]; + #ifdef __cplusplus } #endif diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index 8a9fd88c1..f68887529 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -13,6 +13,8 @@ #include #include +#include +#include #include #include #include @@ -114,6 +116,10 @@ static const char *tbb_symbol[TBB_POOL_SYMBOLS_MAX] = { #endif }; +struct ctl *pool_scallable_ctl_root; + +static UTIL_ONCE_FLAG ctl_initialized = UTIL_ONCE_FLAG_INIT; + static void init_tbb_callbacks_once(void) { const char *lib_name = tbb_symbol[TBB_LIB_NAME]; tbb_callbacks.lib_handle = utils_open_library(lib_name, 0); @@ -405,6 +411,38 @@ static umf_result_t tbb_get_last_allocation_error(void *pool) { return TLS_last_allocation_error; } +static int CTL_READ_HANDLER(tracking_enabled)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + int *arg_out = arg; + umf_memory_pool_handle_t pool = (umf_memory_pool_handle_t)ctx; + *arg_out = pool->flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING ? 0 : 1; + return 0; +} + +static const umf_ctl_node_t CTL_NODE(params)[] = {CTL_LEAF_RO(tracking_enabled), + CTL_NODE_END}; + +static void initialize_pool_ctl(void) { + pool_scallable_ctl_root = ctl_new(); + CTL_REGISTER_MODULE(pool_scallable_ctl_root, params); +} + +static umf_result_t pool_ctl(void *hPool, int operationType, const char *name, + void *arg, umf_ctl_query_type_t query_type) { + (void)operationType; // unused + umf_memory_pool_handle_t pool_provider = (umf_memory_pool_handle_t)hPool; + utils_init_once(&ctl_initialized, initialize_pool_ctl); + return ctl_query(pool_scallable_ctl_root, pool_provider, + CTL_QUERY_PROGRAMMATIC, name, query_type, arg); +} + static umf_memory_pool_ops_t UMF_SCALABLE_POOL_OPS = { .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = tbb_pool_initialize, @@ -415,7 +453,8 @@ static umf_memory_pool_ops_t UMF_SCALABLE_POOL_OPS = { .aligned_malloc = tbb_aligned_malloc, .malloc_usable_size = tbb_malloc_usable_size, .free = tbb_free, - .get_last_allocation_error = tbb_get_last_allocation_error}; + .get_last_allocation_error = tbb_get_last_allocation_error, + .ctl = pool_ctl}; umf_memory_pool_ops_t *umfScalablePoolOps(void) { return &UMF_SCALABLE_POOL_OPS; diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index bd5ea9c69..9a487a5af 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -13,10 +13,12 @@ #include #include +#include #include +#include +#include #include #include - // OS Memory Provider requires HWLOC #if defined(UMF_NO_HWLOC) @@ -166,6 +168,33 @@ static const char *Native_error_str[] = { "HWLOC topology discovery failed", }; +struct ctl *os_memory_ctl_root; + +static UTIL_ONCE_FLAG ctl_initialized = UTIL_ONCE_FLAG_INIT; + +static int CTL_READ_HANDLER(ipc_enabled)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + int *arg_out = arg; + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + *arg_out = os_provider->IPC_enabled; + return 0; +} + +static const umf_ctl_node_t CTL_NODE(params)[] = {CTL_LEAF_RO(ipc_enabled), + CTL_NODE_END}; + +static void initialize_os_ctl(void) { + os_memory_ctl_root = ctl_new(); + CTL_REGISTER_MODULE(os_memory_ctl_root, params); +} + static void os_store_last_native_error(int32_t native_error, int errno_value) { TLS_last_native_error.native_error = native_error; TLS_last_native_error.errno_value = errno_value; @@ -1401,6 +1430,15 @@ static umf_result_t os_close_ipc_handle(void *provider, void *ptr, return UMF_RESULT_SUCCESS; } +static umf_result_t os_ctl(void *hProvider, int operationType, const char *name, + void *arg, umf_ctl_query_type_t query_type) { + (void)operationType; // unused + os_memory_provider_t *os_provider = (os_memory_provider_t *)hProvider; + utils_init_once(&ctl_initialized, initialize_os_ctl); + return ctl_query(os_memory_ctl_root, os_provider, CTL_QUERY_PROGRAMMATIC, + name, query_type, arg); +} + static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = os_initialize, @@ -1419,7 +1457,9 @@ static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { .ipc.get_ipc_handle = os_get_ipc_handle, .ipc.put_ipc_handle = os_put_ipc_handle, .ipc.open_ipc_handle = os_open_ipc_handle, - .ipc.close_ipc_handle = os_close_ipc_handle}; + .ipc.close_ipc_handle = os_close_ipc_handle, + .ctl = os_ctl, +}; umf_memory_provider_ops_t *umfOsMemoryProviderOps(void) { return &UMF_OS_MEMORY_PROVIDER_OPS; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ecdde95e1..32bdd4c14 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -208,8 +208,14 @@ add_umf_test( LIBS ${UMF_LOGGER_LIBS}) add_umf_test( - NAME ctl - SRCS ctl/test.cpp ctl/ctl_debug.c ../src/ctl/ctl.c ${BA_SOURCES_FOR_TEST} + NAME ctl_unittest + SRCS ctl/ctl_unittest.cpp ctl/ctl_debug.c ../src/ctl/ctl.c + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +add_umf_test( + NAME ctl_api + SRCS ctl/ctl_api.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( diff --git a/test/ctl/config.txt b/test/ctl/config.txt index 5d4f9c62b..52c8febad 100644 --- a/test/ctl/config.txt +++ b/test/ctl/config.txt @@ -1 +1,3 @@ -debug.heap.alloc_pattern=321 \ No newline at end of file +debug.heap.alloc_pattern=321; +debug.heap.enable_logging=1; +debug.heap.log_level=5; diff --git a/test/ctl/ctl_api.cpp b/test/ctl/ctl_api.cpp new file mode 100644 index 000000000..ff6491c16 --- /dev/null +++ b/test/ctl/ctl_api.cpp @@ -0,0 +1,142 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include + +#include +#include +#include +#include +#include + +#include "../common/base.hpp" +#include "gtest/gtest.h" + +using namespace umf_test; + +TEST_F(test, ctl_by_handle_os_provider) { + umf_memory_provider_handle_t hProvider = NULL; + umf_os_memory_provider_params_handle_t os_memory_provider_params = NULL; + umf_memory_provider_ops_t *os_provider_ops = umfOsMemoryProviderOps(); + if (os_provider_ops == NULL) { + GTEST_SKIP() << "OS memory provider is not supported!"; + } + + int ret = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ret = umfMemoryProviderCreate(os_provider_ops, os_memory_provider_params, + &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + int ipc_enabled = 0xBAD; + ret = umfCtlGet("umf.provider.by_handle.params.ipc_enabled", hProvider, + &ipc_enabled); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(ipc_enabled, 0); + + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umfMemoryProviderDestroy(hProvider); +} + +// Create a memory provider and a memory pool +umf_memory_provider_handle_t create_memory_provider() { + umf_memory_provider_ops_t *provider_ops = umfOsMemoryProviderOps(); + umf_os_memory_provider_params_handle_t params = NULL; + umf_memory_provider_handle_t provider; + + int ret = umfOsMemoryProviderParamsCreate(¶ms); + if (ret != UMF_RESULT_SUCCESS) { + return 0; + } + + ret = umfMemoryProviderCreate(provider_ops, params, &provider); + umfOsMemoryProviderParamsDestroy(params); + if (ret != UMF_RESULT_SUCCESS) { + return 0; + } + + return provider; +} + +class CtlTest : public ::testing::Test { + public: + class CtlException : public std::exception { + public: + CtlException(const char *msg) : msg(msg) {} + const char *what() const noexcept override { return msg; } + + private: + const char *msg; + }; + + void SetUp() override { + provider = NULL; + pool = NULL; + } + + void instantiatePool(umf_memory_pool_ops_t *pool_ops, void *pool_params, + umf_pool_create_flags_t flags = 0) { + freeResources(); + provider = create_memory_provider(); + if (provider == NULL) { + throw CtlException("Failed to create a memory provider!"); + } + int ret = umfPoolCreate(pool_ops, provider, pool_params, flags, &pool); + if (ret != UMF_RESULT_SUCCESS) { + throw CtlException("Failed to create a memory provider!"); + } + } + + template + void validateQuery( + std::function + ctlApiFunction, + const char *name, T expectedValue, umf_result_t expected) { + T value = 0xBAD; + umf_result_t ret = ctlApiFunction(name, pool, &value); + ASSERT_EQ(ret, expected); + if (ret == UMF_RESULT_SUCCESS) { + ASSERT_EQ(value, expectedValue); + } + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + void TearDown() override { freeResources(); } + + private: + void freeResources() { + if (pool) { + umfPoolDestroy(pool); + } + if (provider) { + umfMemoryProviderDestroy(provider); + } + } + + umf_memory_provider_handle_t provider; + umf_memory_pool_handle_t pool; +}; + +TEST_F(CtlTest, ctl_by_handle_scalablePool) { + try { + instantiatePool(umfScalablePoolOps(), NULL); + validateQuery(umfCtlGet, + "umf.pool.by_handle.params.tracking_enabled", 1, + UMF_RESULT_SUCCESS); + + instantiatePool(umfScalablePoolOps(), NULL, + UMF_POOL_CREATE_FLAG_DISABLE_TRACKING); + validateQuery(umfCtlGet, + "umf.pool.by_handle.params.tracking_enabled", 0, + UMF_RESULT_SUCCESS); + } catch (CtlTest::CtlException &e) { + GTEST_SKIP() << e.what(); + } catch (...) { + GTEST_FAIL() << "Unknown exception!"; + } +} diff --git a/test/ctl/ctl_debug.c b/test/ctl/ctl_debug.c index 711cb5e17..5bc2920ea 100644 --- a/test/ctl/ctl_debug.c +++ b/test/ctl/ctl_debug.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,11 +25,13 @@ struct ctl *get_debug_ctl(void) { return ctl_debug; } * CTL_WRITE_HANDLER(alloc_pattern) -- sets the alloc_pattern field in heap */ static int CTL_WRITE_HANDLER(alloc_pattern)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int arg_in = *(int *)arg; alloc_pattern = arg_in; @@ -40,11 +42,13 @@ static int CTL_WRITE_HANDLER(alloc_pattern)(void *ctx, * CTL_READ_HANDLER(alloc_pattern) -- returns alloc_pattern heap field */ static int CTL_READ_HANDLER(alloc_pattern)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int *arg_out = arg; *arg_out = alloc_pattern; @@ -52,11 +56,13 @@ static int CTL_READ_HANDLER(alloc_pattern)(void *ctx, } static int CTL_WRITE_HANDLER(enable_logging)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int arg_in = *(int *)arg; enable_logging = arg_in; @@ -64,33 +70,40 @@ static int CTL_WRITE_HANDLER(enable_logging)(void *ctx, } static int CTL_READ_HANDLER(enable_logging)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int *arg_out = arg; *arg_out = enable_logging; return 0; } -static int CTL_WRITE_HANDLER(log_level)(void *ctx, enum ctl_query_source source, +static int CTL_WRITE_HANDLER(log_level)(void *ctx, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int arg_in = *(int *)arg; log_level = arg_in; return 0; } -static int CTL_READ_HANDLER(log_level)(void *ctx, enum ctl_query_source source, +static int CTL_READ_HANDLER(log_level)(void *ctx, umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int *arg_out = arg; *arg_out = log_level; @@ -103,15 +116,15 @@ static const struct ctl_argument CTL_ARG(enable_logging) = CTL_ARG_BOOLEAN; static const struct ctl_argument CTL_ARG(log_level) = CTL_ARG_INT; -static const struct ctl_node CTL_NODE(heap)[] = {CTL_LEAF_RW(alloc_pattern), - CTL_LEAF_RW(enable_logging), - CTL_LEAF_RW(log_level), +static const umf_ctl_node_t CTL_NODE(heap)[] = {CTL_LEAF_RW(alloc_pattern), + CTL_LEAF_RW(enable_logging), + CTL_LEAF_RW(log_level), - CTL_NODE_END}; + CTL_NODE_END}; -static const struct ctl_node CTL_NODE(debug)[] = {CTL_CHILD(heap), +static const umf_ctl_node_t CTL_NODE(debug)[] = {CTL_CHILD(heap), - CTL_NODE_END}; + CTL_NODE_END}; /* * debug_ctl_register -- registers ctl nodes for "debug" module diff --git a/test/ctl/test.cpp b/test/ctl/ctl_unittest.cpp similarity index 100% rename from test/ctl/test.cpp rename to test/ctl/ctl_unittest.cpp From 94d11508432a2b93da671f4ee80d013d199680ae Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Tue, 25 Feb 2025 14:06:06 +0100 Subject: [PATCH 002/143] Move *priv members to the end of internal structures This commit moves provider_priv and pool_priv members in internal structures due to compatibility issues. From now, adding new ops will not break accessing these members. --- src/memory_pool_internal.h | 4 +++- src/memory_provider_internal.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/memory_pool_internal.h b/src/memory_pool_internal.h index ab3378163..4e3c31696 100644 --- a/src/memory_pool_internal.h +++ b/src/memory_pool_internal.h @@ -26,7 +26,6 @@ extern "C" { typedef struct umf_memory_pool_t { void *pool_priv; - umf_memory_pool_ops_t ops; umf_pool_create_flags_t flags; // Memory provider used by the pool. @@ -34,6 +33,9 @@ typedef struct umf_memory_pool_t { utils_mutex_t lock; void *tag; + + // ops should be the last due to possible change size in the future + umf_memory_pool_ops_t ops; } umf_memory_pool_t; #ifdef __cplusplus diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index 5abc88d3b..4b4ec8b2d 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -21,8 +21,9 @@ extern "C" { #endif typedef struct umf_memory_provider_t { - umf_memory_provider_ops_t ops; void *provider_priv; + // ops should be the last due to possible change size in the future + umf_memory_provider_ops_t ops; } umf_memory_provider_t; void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); From d319917f71f6040032ba7b7a945cc9fe638dc3fc Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Tue, 25 Feb 2025 14:11:36 +0100 Subject: [PATCH 003/143] [CI] Disable mempolicy test in compatibility tests --- .github/workflows/reusable_compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index fbd17a2f4..c7f84d6e3 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -110,7 +110,7 @@ jobs: run: > UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ - ctest --output-on-failure + ctest --output-on-failure -E "umf-mempolicy" # disable tests that rely on internal structures windows-build: name: Windows From 0d62314320249ce18bec3dcf0d8a40385a4f0ade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 26 Feb 2025 16:30:54 +0100 Subject: [PATCH 004/143] add memusage stat to os_provider and use it in benchmarks --- benchmark/benchmark.hpp | 58 ++++++++++-- benchmark/benchmark_umf.hpp | 69 ++++++++++---- src/provider/provider_os_memory.c | 102 ++++++++++++++++++++- src/provider/provider_os_memory_internal.h | 5 +- src/utils/utils_concurrency.h | 12 ++- 5 files changed, 220 insertions(+), 26 deletions(-) diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index a960d89bc..b096716b3 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -173,6 +173,14 @@ class provider_allocator : public allocator_interface { return argPos; } + void preBench(::benchmark::State &state) override { + provider.preBench(state); + } + + void postBench(::benchmark::State &state) override { + provider.postBench(state); + } + void TearDown(::benchmark::State &state) override { provider.TearDown(state); } @@ -204,13 +212,18 @@ template class pool_allocator : public allocator_interface { return argPos; } + void preBench(::benchmark::State &state) override { pool.preBench(state); } + void postBench(::benchmark::State &state) override { + pool.postBench(state); + } + void TearDown(::benchmark::State &state) override { pool.TearDown(state); } - virtual void *benchAlloc(size_t size) override { + void *benchAlloc(size_t size) override { return umfPoolMalloc(pool.pool, size); } - virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + void benchFree(void *ptr, [[maybe_unused]] size_t size) override { umfPoolFree(pool.pool, ptr); } @@ -241,7 +254,7 @@ struct benchmark_interface : public benchmark::Fixture { allocator.TearDown(state); } - virtual void bench(::benchmark::State &state) = 0; + void bench([[maybe_unused]] ::benchmark::State &state){}; virtual std::vector argsName() { auto s = Size::argsName(); @@ -260,6 +273,9 @@ struct benchmark_interface : public benchmark::Fixture { benchmark->ArgNames(bench->argsName())->Name(bench->name()); } + void custom_counters(::benchmark::State &state) { + allocator.custom_counters(state); + } std::vector alloc_sizes; Allocator allocator; }; @@ -282,7 +298,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { vector2d allocations; std::vector iters; - + std::vector memused; vector2d next; std::vector::const_iterator> next_iter; int64_t iterations; @@ -302,6 +318,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { allocations.resize(state.threads()); next.resize(state.threads()); next_iter.resize(state.threads()); + memused.assign(state.threads(), 0); #ifndef WIN32 // Ensure that system malloc does not have memory pooled on the heap @@ -323,13 +340,36 @@ class multiple_malloc_free_benchmark : public benchmark_interface { waitForAllThreads(state); // prepare workload for actual benchmark. freeAllocs(state); + prealloc(state); prepareWorkload(state); + waitForAllThreads(state); + base::allocator.preBench(state); } void TearDown(::benchmark::State &state) override { + base::allocator.postBench(state); auto tid = state.thread_index(); + if (tid == 0) { + size_t current_memory_allocated = 0; + for (const auto &used : memused) { + current_memory_allocated += used; + } + + auto memory_used = state.counters["provider_memory_allocated"]; + + if (memory_used != 0) { + state.counters["benchmark_memory_allocated"] = + static_cast(current_memory_allocated); + state.counters["memory_overhead"] = + 100.0 * (memory_used - current_memory_allocated) / + memory_used; + } else { + state.counters.erase("provider_memory_allocated"); + } + } + waitForAllThreads(state); freeAllocs(state); waitForAllThreads(state); if (tid == 0) { @@ -342,20 +382,22 @@ class multiple_malloc_free_benchmark : public benchmark_interface { base::TearDown(state); } - void bench(benchmark::State &state) override { + void bench(benchmark::State &state) { auto tid = state.thread_index(); auto &allocation = allocations[tid]; + auto &memuse = memused[tid]; for (int i = 0; i < allocsPerIterations; i++) { auto &n = *next_iter[tid]++; auto &alloc = allocation[n.offset]; base::allocator.benchFree(alloc.ptr, alloc.size); - + memuse -= alloc.size; alloc.size = n.size; alloc.ptr = base::allocator.benchAlloc(alloc.size); if (alloc.ptr == NULL) { state.SkipWithError("allocation failed"); } + memuse += alloc.size; } } @@ -376,7 +418,9 @@ class multiple_malloc_free_benchmark : public benchmark_interface { auto tid = state.thread_index(); auto &i = allocations[tid]; i.resize(max_allocs); + auto &memuse = memused[tid]; auto sizeGenerator = base::alloc_sizes[tid]; + for (size_t j = 0; j < max_allocs; j++) { auto size = sizeGenerator.nextSize(); i[j].ptr = base::allocator.benchAlloc(size); @@ -385,6 +429,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { return; } i[j].size = size; + memuse += size; } } @@ -394,6 +439,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { for (auto &j : i) { if (j.ptr != NULL) { base::allocator.benchFree(j.ptr, j.size); + memused[tid] -= j.size; j.ptr = NULL; j.size = 0; } diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index cfc9982d2..9553d6fdb 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -11,8 +11,6 @@ #include #include #include - -#include #include #include @@ -30,7 +28,7 @@ struct provider_interface { using params_ptr = std::unique_ptr; umf_memory_provider_handle_t provider = NULL; - virtual void SetUp(::benchmark::State &state) { + void SetUp(::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -42,7 +40,27 @@ struct provider_interface { } } - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + void preBench([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + umfCtlExec("umf.provider.by_handle.stats.reset", provider, NULL); + } + + void postBench([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + size_t arg; + umf_result_t ret = umfCtlGet( + "umf.provider.by_handle.stats.allocated_memory", provider, &arg); + if (ret == UMF_RESULT_SUCCESS) { + state.counters["provider_memory_allocated"] = + static_cast(arg); + } + } + + void TearDown([[maybe_unused]] ::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -53,9 +71,7 @@ struct provider_interface { } virtual umf_memory_provider_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } + getOps([[maybe_unused]] ::benchmark::State &state) = 0; virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { return {nullptr, [](void *) {}}; @@ -68,7 +84,7 @@ template ; - virtual void SetUp(::benchmark::State &state) { + void SetUp(::benchmark::State &state) { provider.SetUp(state); if (state.thread_index() != 0) { return; @@ -80,7 +96,22 @@ struct pool_interface { state.SkipWithError("umfPoolCreate() failed"); } } - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + + void preBench([[maybe_unused]] ::benchmark::State &state) { + provider.preBench(state); + if (state.thread_index() != 0) { + return; + } + } + + void postBench([[maybe_unused]] ::benchmark::State &state) { + provider.postBench(state); + if (state.thread_index() != 0) { + return; + } + } + + void TearDown([[maybe_unused]] ::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -93,15 +124,17 @@ struct pool_interface { if (pool) { umfPoolDestroy(pool); } + + provider.TearDown(state); }; virtual umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } + getOps([[maybe_unused]] ::benchmark::State &state) = 0; + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { return {nullptr, [](void *) {}}; } + T provider; umf_memory_pool_handle_t pool; }; @@ -110,6 +143,8 @@ class allocator_interface { public: virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, [[maybe_unused]] unsigned argPos) = 0; + virtual void preBench([[maybe_unused]] ::benchmark::State &state) = 0; + virtual void postBench([[maybe_unused]] ::benchmark::State &state) = 0; virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; virtual void *benchAlloc(size_t size) = 0; virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0; @@ -121,7 +156,9 @@ struct glibc_malloc : public allocator_interface { unsigned argPos) override { return argPos; } - void TearDown([[maybe_unused]] ::benchmark::State &state) override{}; + void preBench([[maybe_unused]] ::benchmark::State &state) override {} + void postBench([[maybe_unused]] ::benchmark::State &state) override {} + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} void *benchAlloc(size_t size) override { return malloc(size); } void benchFree(void *ptr, [[maybe_unused]] size_t size) override { free(ptr); @@ -163,7 +200,7 @@ struct fixed_provider : public provider_interface { char *mem = NULL; const size_t size = 1024 * 1024 * 1024; // 1GB public: - virtual void SetUp(::benchmark::State &state) override { + void SetUp(::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -175,7 +212,7 @@ struct fixed_provider : public provider_interface { provider_interface::SetUp(state); } - virtual void TearDown(::benchmark::State &state) override { + void TearDown(::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -295,7 +332,7 @@ struct jemalloc_pool : public pool_interface { #ifdef UMF_POOL_SCALABLE_ENABLED template struct scalable_pool : public pool_interface { - virtual umf_memory_pool_ops_t * + umf_memory_pool_ops_t * getOps([[maybe_unused]] ::benchmark::State &state) override { return umfScalablePoolOps(); } diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index f3e5c7fa0..1ecb397fe 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -6,19 +6,21 @@ */ #include +#include #include #include + #include #include #include #include - -#include #include #include #include #include #include + +#include "utils_assert.h" // OS Memory Provider requires HWLOC #if defined(UMF_NO_HWLOC) @@ -187,12 +189,77 @@ static int CTL_READ_HANDLER(ipc_enabled)(void *ctx, return 0; } +static int CTL_READ_HANDLER(peak_memory)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + size_t *arg_out = arg; + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + COMPILE_ERROR_ON(sizeof(os_provider->stats.peak_memory) != + sizeof(uint64_t)); + utils_atomic_load_acquire_u64((uint64_t *)&os_provider->stats.peak_memory, + (uint64_t *)arg_out); + return 0; +} + +static int CTL_READ_HANDLER(allocated_memory)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + size_t *arg_out = arg; + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(*arg_out) != sizeof(uint64_t)); + utils_atomic_load_acquire_u64( + (uint64_t *)&os_provider->stats.allocated_memory, (uint64_t *)arg_out); + return 0; +} + +static int CTL_RUNNABLE_HANDLER(reset)(void *ctx, umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)arg, (void)extra_name, (void)query_type; + + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + size_t allocated; + + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(allocated) != sizeof(uint64_t)); + + utils_atomic_load_acquire_u64( + (uint64_t *)&os_provider->stats.allocated_memory, + (uint64_t *)&allocated); + utils_atomic_store_release_u64((uint64_t *)&os_provider->stats.peak_memory, + (uint64_t)allocated); + + return 0; +} +static const umf_ctl_node_t CTL_NODE(stats)[] = { + CTL_LEAF_RO(allocated_memory), CTL_LEAF_RO(peak_memory), + CTL_LEAF_RUNNABLE(reset), CTL_NODE_END}; + static const umf_ctl_node_t CTL_NODE(params)[] = {CTL_LEAF_RO(ipc_enabled), CTL_NODE_END}; static void initialize_os_ctl(void) { os_memory_ctl_root = ctl_new(); CTL_REGISTER_MODULE(os_memory_ctl_root, params); + CTL_REGISTER_MODULE(os_memory_ctl_root, stats); } static void os_store_last_native_error(int32_t native_error, int errno_value) { @@ -1109,6 +1176,29 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment, *resultPtr = addr; + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(os_provider->stats.peak_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(size) != sizeof(uint64_t)); + // TODO: Change to memory_order_relaxed when we will have a proper wrapper + size_t allocated = + utils_fetch_and_add_u64( + (uint64_t *)&os_provider->stats.allocated_memory, (uint64_t)size) + + size; + + uint64_t peak; + utils_atomic_load_acquire_u64((uint64_t *)&os_provider->stats.peak_memory, + &peak); + + while (allocated > peak && !utils_compare_exchange_u64( + (uint64_t *)&os_provider->stats.peak_memory, + &peak, (uint64_t *)&allocated)) { + /* If the compare-exchange fails, 'peak' is updated to the current value of peak_memory. + We then re-check whether allocated is still greater than the updated peak value. */ + ; + } + return UMF_RESULT_SUCCESS; err_unmap: @@ -1136,6 +1226,14 @@ static umf_result_t os_free(void *provider, void *ptr, size_t size) { return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; } + COMPILE_ERROR_ON(sizeof(size) != sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + + // TODO: Change it to memory_order_relaxed when we will have a proper wrapper + utils_fetch_and_sub_u64((uint64_t *)&os_provider->stats.allocated_memory, + size); + return UMF_RESULT_SUCCESS; } diff --git a/src/provider/provider_os_memory_internal.h b/src/provider/provider_os_memory_internal.h index 4a603b1da..a3f35cbd3 100644 --- a/src/provider/provider_os_memory_internal.h +++ b/src/provider/provider_os_memory_internal.h @@ -10,7 +10,6 @@ #include #include - #if defined(_WIN32) && !defined(NAME_MAX) #include #define NAME_MAX _MAX_FNAME @@ -68,6 +67,10 @@ typedef struct os_memory_provider_t { size_t partitions_weight_sum; hwloc_topology_t topo; + struct { + size_t allocated_memory; + size_t peak_memory; + } stats; } os_memory_provider_t; #ifdef __cplusplus diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 0104b8646..638c1c426 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -120,11 +120,15 @@ static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { *(uintptr_t *)out = ret; } +static inline void utils_atomic_store_release_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + InterlockedExchange64((LONG64 volatile *)ptr, val); +} + static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); InterlockedExchangePointer(ptr, val); } - static inline uint64_t utils_atomic_increment_u64(uint64_t *ptr) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); // return incremented value @@ -183,6 +187,12 @@ static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { utils_annotate_acquire(ptr); } +static inline void utils_atomic_store_release_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + utils_annotate_release(ptr); + __atomic_store_n(ptr, val, memory_order_release); +} + static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); utils_annotate_release(ptr); From 387a2a94402f8c6930d35c8c48a8b7111a5ac9c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 19 Mar 2025 13:13:35 +0100 Subject: [PATCH 005/143] Move CTL functions into 0.12 sections (in .map/.def files) --- src/libumf.def | 7 ++++--- src/libumf.map | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/libumf.def b/src/libumf.def index dd0ddfbfc..34ecee889 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -119,9 +119,6 @@ EXPORTS umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 umfCUDAMemoryProviderParamsSetAllocFlags - umfCtlExec - umfCtlGet - umfCtlSet umfDisjointPoolOps umfDisjointPoolParamsCreate umfDisjointPoolParamsDestroy @@ -139,3 +136,7 @@ EXPORTS umfFixedMemoryProviderParamsDestroy umfLevelZeroMemoryProviderParamsSetFreePolicy umfLevelZeroMemoryProviderParamsSetDeviceOrdinal +; Added in UMF_0.12 + umfCtlExec + umfCtlGet + umfCtlSet diff --git a/src/libumf.map b/src/libumf.map index 5e97acc09..f9ec9b6bf 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -117,9 +117,6 @@ UMF_0.10 { UMF_0.11 { umfCUDAMemoryProviderParamsSetAllocFlags; - umfCtlExec; - umfCtlGet; - umfCtlSet; umfDisjointPoolOps; umfDisjointPoolParamsCreate; umfDisjointPoolParamsDestroy; @@ -138,3 +135,9 @@ UMF_0.11 { umfLevelZeroMemoryProviderParamsSetFreePolicy; umfLevelZeroMemoryProviderParamsSetDeviceOrdinal; } UMF_0.10; + +UMF_0.12 { + umfCtlExec; + umfCtlGet; + umfCtlSet; +} UMF_0.11; From ffa4eb6179ebe3d4e3e2ecd5207306609069876c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 19 Mar 2025 13:32:26 +0100 Subject: [PATCH 006/143] remove pool benchmarks with fixed provider. Simplify benchmark tests by removing redundant pool benchmarks for fixed provider, as results are nearly identical to os provider. Also reduce iteration count for 'fix' provider benchmarks to match with 'os' provider. --- benchmark/benchmark.cpp | 67 ++++------------------------------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index f57c0d5ae..4ab5a62e2 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -143,73 +143,18 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool_fixedprovider) ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&singlethreaded); + ->Apply(&singlethreaded) + // reduce iterations, to match os_provider benchmark + ->Iterations(50000); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, fixed_provider, fixed_alloc_size, provider_allocator); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, fixed_provider) ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&singlethreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - disjoint_pool_fix_fixedprovider, fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - disjoint_pool_fix_fixedprovider) - ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - disjoint_pool_uniform_fixedprovider, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - disjoint_pool_uniform_fixedprovider) - ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); - -#ifdef UMF_POOL_JEMALLOC_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - jemalloc_pool_fixedprovider, fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - jemalloc_pool_fixedprovider) - ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - jemalloc_pool_uniform_fixedprovider, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - jemalloc_pool_uniform_fixedprovider) - ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); - -#endif - -#ifdef UMF_POOL_SCALABLE_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - scalable_pool_fix_fixedprovider, fixed_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - scalable_pool_fix_fixedprovider) - ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - scalable_pool_uniform_fixedprovider, - uniform_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - scalable_pool_uniform_fixedprovider) - ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); - -#endif + ->Apply(&singlethreaded) + // reduce iterations, to match os_provider benchmark + ->Iterations(50000); //BENCHMARK_MAIN(); int main(int argc, char **argv) { From fa7a6acdaa85379742c8906cb2f722c8369d284d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 19 Mar 2025 13:46:43 +0100 Subject: [PATCH 007/143] reduce number of threads with disjoint pool --- benchmark/benchmark.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index f57c0d5ae..9073e5b93 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -30,11 +30,10 @@ // The exact meaning of each argument depends on the benchmark, allocator, and size components used. // Refer to the 'argsName()' function in each component to find detailed descriptions of these arguments. +template static void multithreaded(benchmark::internal::Benchmark *benchmark) { - benchmark->Threads(12); - benchmark->Threads(8); - benchmark->Threads(4); benchmark->Threads(1); + benchmark->DenseThreadRange(4, max_threads, 4); } static void singlethreaded(benchmark::internal::Benchmark *benchmark) { @@ -91,14 +90,16 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); + // Limit benchmarks to 4 threads, as the disjoint pool scales poorly with higher thread counts. + ->Apply(&multithreaded<4>); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); + // Limit benchmarks to 4 threads, as the disjoint pool scales poorly with higher thread counts. + ->Apply(&multithreaded<4>); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From 89443bfa2ead8999c6c27a88ded1eeb6ac30e014 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 19 Mar 2025 16:05:14 +0000 Subject: [PATCH 008/143] Fix -Wformat warning in provider_tracking.c The warnings are visible when building UR and SYCL --- src/provider/provider_tracking.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index da9d9ab77..1bef85854 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -260,7 +260,7 @@ static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, LOG_DEBUG("memory region removed: tracker=%p, level=%i, pool=%p, ptr=%p, " "size=%zu", - (void *)hTracker, level, value->pool, ptr, value->size); + (void *)hTracker, level, (void *)value->pool, ptr, value->size); if (parent_value) { LOG_DEBUG( @@ -302,13 +302,14 @@ umfMemoryTrackerAddIpcSegment(umf_memory_tracker_handle_t hTracker, if (ret == 0) { LOG_DEBUG("IPC memory region is added, tracker=%p, ptr=%p, size=%zu, " "provider=%p, cache_entry=%p", - (void *)hTracker, ptr, size, provider, cache_entry); + (void *)hTracker, ptr, size, (void *)provider, + (void *)cache_entry); return UMF_RESULT_SUCCESS; } LOG_ERR("failed to insert tracker_ipc_info_t, ret=%d, ptr=%p, size=%zu, " "provider=%p, cache_entry=%p", - ret, ptr, size, provider, cache_entry); + ret, ptr, size, (void *)provider, (void *)cache_entry); umf_ba_free(hTracker->ipc_info_allocator, value); @@ -335,7 +336,8 @@ umfMemoryTrackerRemoveIpcSegment(umf_memory_tracker_handle_t hTracker, LOG_DEBUG("IPC memory region removed: tracker=%p, ptr=%p, size=%zu, " "provider=%p, cache_entry=%p", - (void *)hTracker, ptr, v->size, v->provider, v->ipc_cache_value); + (void *)hTracker, ptr, v->size, (void *)v->provider, + (void *)v->ipc_cache_value); umf_ba_free(hTracker->ipc_info_allocator, value); From 2eeb9a61057ab938b9feacff19ee85a397072a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 20 Mar 2025 10:22:35 +0100 Subject: [PATCH 009/143] [CI][Bench] Use new version of bench dashboard incl. using new format of data (stored on 'benchmark-results' branch). --- .github/workflows/benchmarks.yml | 18 ++-- .github/workflows/nightly.yml | 7 +- .github/workflows/reusable_benchmarks.yml | 113 ++++++++++++++-------- .github/workflows/reusable_docs_build.yml | 36 +++++-- 4 files changed, 111 insertions(+), 63 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 7eb3c7b06..b18a41c4b 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -7,18 +7,22 @@ on: description: PR number (if 0, it'll run on the main) type: number bench_script_params: + # If you want to save the results of the manual run in 'benchmark-results' branch, + # you have to pass '--save XXX', where XXX is the label of your results. description: Parameters passed to script executing benchmark type: string required: false default: '' - upload_report: - description: 'Upload HTML report' - type: boolean - required: false - default: false + runner: + description: Runner + type: choice + required: true + default: 'L0_PERF' + options: + - L0_PERF permissions: - contents: read + contents: write pull-requests: write jobs: @@ -28,4 +32,4 @@ jobs: with: pr_no: ${{ inputs.pr_no }} bench_script_params: ${{ inputs.bench_script_params }} - upload_report: ${{ inputs.upload_report }} + runner: ${{ inputs.runner }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 1317482fd..b11d17fa4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -248,9 +248,9 @@ jobs: call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test - hwloc-fallback: # Scenarios where UMF_LINK_HWLOC_STATICALLY is set to OFF and hwloc is not installed in the system # The hwloc library is fetched implicitly + hwloc-fallback: name: "Fallback to static hwloc build" strategy: matrix: @@ -317,9 +317,8 @@ jobs: Benchmarks: uses: ./.github/workflows/reusable_benchmarks.yml permissions: - contents: read + contents: write pull-requests: write with: pr_no: '0' - bench_script_params: '--save baseline' - upload_report: true + bench_script_params: '--save Baseline_PVC' diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 3953e98de..26f9c348b 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -1,5 +1,5 @@ # Executes benchmarks implemented in this repository using scripts -# for results visualization from intel/llvm (unified-runtime dir). +# for results visualization from intel/llvm. name: Benchmarks on: @@ -14,13 +14,13 @@ on: required: false type: string default: '' - upload_report: + runner: required: false - type: boolean - default: false + type: string + default: 'L0_PERF' permissions: - contents: read + contents: write pull-requests: write env: @@ -32,17 +32,9 @@ jobs: name: Benchmarks # run only on upstream; forks will not have the HW if: github.repository == 'oneapi-src/unified-memory-framework' - runs-on: L0_PERF + runs-on: ${{ inputs.runner }} steps: - # Workspace on self-hosted runners is not cleaned automatically. - # We have to delete the files created outside of using actions. - - name: Cleanup self-hosted workspace - if: always() - run: | - ls -la ./ - rm -rf ./* || true - - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ always() && inputs.pr_no != 0 }} @@ -97,23 +89,32 @@ jobs: - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - # Get scripts for benchmark data visualization. - # Use specific tag, as the scripts or files' location may change. - - name: Checkout SYCL + - name: Checkout UMF results branch + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + ref: benchmark-results + path: results-repo + + # Get scripts for benchmark data visualization (from SYCL repo). + # Use specific ref, as the scripts or files' location may change. + - name: Checkout benchmark scripts uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: intel/llvm - # [BENCHMARK] fix default timeout parameter - # https://github.com/intel/llvm/pull/17412 - ref: 357e9e0b253b7eba105d044e38452b3c09169f8a - path: sycl-repo - fetch-depth: 1 + # Note: The same ref is used in docs build (for dashboard generation)! + # + # 20.03.2025 + # branch: unify-benchmark-ci + ref: cae7049c78c697b3ac94f931716d9efb53addcd8 + path: sc + sparse-checkout: | + devops/scripts/benchmarks - name: Install benchmarking scripts deps run: | python -m venv .venv source .venv/bin/activate - pip install -r ${{github.workspace}}/sycl-repo/unified-runtime/third_party/benchmark_requirements.txt + pip install -r ${{github.workspace}}/sc/devops/scripts/benchmarks/requirements.txt - name: Set core range and GPU mask run: | @@ -135,22 +136,21 @@ jobs: - name: Run UMF benchmarks id: benchmarks - working-directory: ${{env.BUILD_DIR}} run: > - source ${{github.workspace}}/.venv/bin/activate && - taskset -c ${{ env.CORES }} ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py + source .venv/bin/activate && + taskset -c ${{ env.CORES }} ./sc/devops/scripts/benchmarks/main.py ~/bench_workdir_umf --umf ${{env.BUILD_DIR}} - --compare baseline --timeout 3000 - ${{ inputs.upload_report && '--output-html' || '' }} - ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} + --output-html remote + --results-dir ${{ github.workspace }}/results-repo + --output-markdown ${{ inputs.bench_script_params }} # In case it failed to add a comment, we can still print the results. - name: Print benchmark results - if: ${{ always() && inputs.pr_no != 0 }} - run: cat ${{env.BUILD_DIR}}/benchmark_results.md + if: ${{ always() }} + run: cat ${{ github.workspace }}/benchmark_results.md || true - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 @@ -160,7 +160,7 @@ jobs: let markdown = "" try { const fs = require('fs'); - markdown = fs.readFileSync('${{env.BUILD_DIR}}/benchmark_results.md', 'utf8'); + markdown = fs.readFileSync('${{ github.workspace }}/benchmark_results.md', 'utf8'); } catch(err) { } @@ -177,15 +177,42 @@ jobs: repo: context.repo.repo, body: body }) - - - name: Upload HTML report - if: ${{ always() && inputs.upload_report }} - uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 - with: - path: umf-repo/build/benchmark_results.html - key: benchmark-results-${{ github.run_id }} - - name: Get information about platform - if: ${{ always() }} - working-directory: ${{env.UMF_DIR}} - run: .github/scripts/get_system_info.sh + - name: Commit data.json and results directory + working-directory: results-repo + run: | + git config --global user.name "GitHub Actions Bot" + git config --global user.email "actions@github.com" + + for attempt in {1..5}; do + echo "Attempt #$attempt to push changes" + + rm -f data.json + cp ${{ github.workspace }}/sc/devops/scripts/benchmarks/html/data.json . + + git add data.json results/ + git commit -m "Add benchmark results and data.json" + + results_file=$(git diff HEAD~1 --name-only -- results/ | head -n 1) + + if git push origin benchmark-results; then + echo "Push succeeded" + break + fi + + echo "Push failed, retrying..." + + if [ -n "$results_file" ]; then + mv $results_file ${{ github.workspace }}/temp_$(basename $results_file) + + git reset --hard origin/benchmark-results + git pull origin benchmark-results + + new_file="results/$(basename "$results_file")" + mv ${{ github.workspace }}/temp_$(basename $results_file) $new_file + fi + + echo "Regenerating data.json" + (cd ${{ github.workspace }} && ${{ github.workspace }}/sc/devops/scripts/benchmarks/main.py ~/bench_workdir_umf --dry-run --results-dir ${{ github.workspace }}/results-repo --output-html remote) + + done diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index 9317478bb..e12895aeb 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -45,19 +45,37 @@ jobs: -DUMF_DISABLE_HWLOC=ON cmake --build build --target docs - # If we upload HTML docs, we want to include benchmark results as well - - name: Download benchmark HTML before uploading docs + # + # Documentation is built. Now we want to add benchmark dashboard. + # We only do it if inputs.upload is set, as this job is also used for testing docs build. + # + - name: Checkout benchmark scripts if: ${{ inputs.upload == true }} - id: download-bench-html - uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - path: umf-repo/build/benchmark_results.html - key: benchmark-results- + repository: intel/llvm + # 20.03.2025 + # branch: unify-benchmark-ci + ref: cae7049c78c697b3ac94f931716d9efb53addcd8 + path: sc + sparse-checkout: | + devops/scripts/benchmarks - - name: Move benchmark HTML - if: ${{ inputs.upload == true && steps.download-bench-html.outputs.cache-hit != '' }} + - name: Move benchmark HTML files + if: ${{ inputs.upload == true }} + working-directory: ${{ github.workspace }}/build/docs_build/generated/html + run: | + mkdir performance + mv ${{ github.workspace }}/sc/devops/scripts/benchmarks/html/* performance/ + + - name: Replace config.js + if: ${{ inputs.upload == true }} + working-directory: ${{ github.workspace }}/build/docs_build/generated/html run: | - mv umf-repo/build/benchmark_results.html ${{github.workspace}}/build/docs_build/generated/html + cat << 'EOF' > ./performance/config.js + remoteDataUrl = 'https://raw.githubusercontent.com/oneapi-src/unified-memory-framework/refs/heads/benchmark-results/data.json'; + defaultCompareNames = ["Baseline_PVC"]; + EOF - name: Upload artifact if: ${{ inputs.upload == true }} From bdac43d9b06b045486626b6e60d19a14c2734db2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 21 Mar 2025 12:28:32 +0100 Subject: [PATCH 010/143] [CI] Minor update in get_system_info.sh --- .github/scripts/get_system_info.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/get_system_info.sh b/.github/scripts/get_system_info.sh index 81c54ce98..573c7195d 100755 --- a/.github/scripts/get_system_info.sh +++ b/.github/scripts/get_system_info.sh @@ -7,7 +7,7 @@ function check_L0_version { if command -v dpkg &> /dev/null; then - dpkg -l | grep level-zero && return + dpkg -l | grep -iE "level-zero|libze|Compute Runtime|Level Zero" && return fi if command -v rpm &> /dev/null; then @@ -34,7 +34,7 @@ function system_info { numactl -H echo "**********VGA info**********" - lspci | grep -i VGA + lspci | grep -iE "vga|display|gpu" echo "**********CUDA Version**********" if command -v nvidia-smi &> /dev/null; then From 49a1a33bb4f4616b20e75df541c6b6d9cd056e6f Mon Sep 17 00:00:00 2001 From: "Dubinov, Igor" Date: Mon, 24 Mar 2025 15:28:39 +0100 Subject: [PATCH 011/143] Fix for uninitialized variable --- test/ctl/ctl_api.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/ctl/ctl_api.cpp b/test/ctl/ctl_api.cpp index ff6491c16..93c059052 100644 --- a/test/ctl/ctl_api.cpp +++ b/test/ctl/ctl_api.cpp @@ -74,6 +74,8 @@ class CtlTest : public ::testing::Test { const char *msg; }; + CtlTest() : provider(NULL), pool(NULL) {} + void SetUp() override { provider = NULL; pool = NULL; From 8065bb06e62851b0485b8de930b43362c380e784 Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Mon, 10 Mar 2025 13:17:39 +0100 Subject: [PATCH 012/143] Add sycl compatibility workflow --- .github/workflows/nightly.yml | 3 + .github/workflows/reusable_sycl.yml | 122 ++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 .github/workflows/reusable_sycl.yml diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index b11d17fa4..c664b7f87 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -322,3 +322,6 @@ jobs: with: pr_no: '0' bench_script_params: '--save Baseline_PVC' + + SYCL: + uses: ./.github/workflows/reusable_sycl.yml diff --git a/.github/workflows/reusable_sycl.yml b/.github/workflows/reusable_sycl.yml new file mode 100644 index 000000000..22682b2ed --- /dev/null +++ b/.github/workflows/reusable_sycl.yml @@ -0,0 +1,122 @@ +# UMF compatibility with intel/llvm workflow. +# The latest llvm daily release and the last working release are tested. +# Triggered in the Nightly workflow. +name: SYCL + +on: workflow_call + +permissions: + contents: read + +jobs: + sycl-compatibility: + # run only on upstream; forks will not have the HW + if: github.repository == 'oneapi-src/unified-memory-framework' + name: ${{matrix.llvm_tag}} llvm build + runs-on: ["DSS-LEVEL_ZERO", "DSS-UBUNTU"] + + strategy: + matrix: + llvm_tag: ["latest", "nightly-2025-02-08"] # "latest" or llvm with UMF v0.11.0-dev2 + + steps: + # Install sycl + - name: Clean up + if: always() + run: rm -rf llvm sycl_linux.tar.gz + + - name: Download llvm daily release + run: | + if [ "${{ matrix.llvm_tag }}" == "latest" ]; then + llvm_tag=$(curl -s https://api.github.com/repos/intel/llvm/releases | awk -F'"' '/"tag_name":/ {print $4; exit}') + else + llvm_tag="${{ matrix.llvm_tag }}" + fi + download_url="https://github.com/intel/llvm/releases/download/${llvm_tag}/sycl_linux.tar.gz" + wget --no-verbose $download_url -O sycl_linux.tar.gz + + - name: Extract llvm + run: | + mkdir llvm + tar -xzf sycl_linux.tar.gz -C llvm --strip-components=1 + + - name: Remove UMF installed with llvm + run: rm -f llvm/lib/libumf* + + - name: Add sycl to PATH + run: | + echo "${{ github.workspace }}/llvm/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${{ github.workspace }}/llvm/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV + + # Install UMF + - name: Checkout UMF + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + path: umf_repo + fetch-depth: 0 + + - name: Configure UMF + working-directory: umf_repo + run: > + cmake + -B build + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/llvm + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_EXAMPLES=OFF + + - name: Build and install UMF + working-directory: umf_repo + run: cmake --build build --target install -j$(nproc) + + - name: Print installed lib files + run: ls -l llvm/lib + + # Test sycl-ls + - name: Run sycl-ls + run: | + ./llvm/bin/sycl-ls | tee sycl-ls-output.log + grep -q "level_zero:gpu" sycl-ls-output.log + + # Test several sycl e2e test + # These are arbitrarily picked tests to check the compatibility + # Note that some intel/llvm tests may be flaky, although I haven't noticed such a behavior in the following tests + - name: Checkout sycl + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: intel/llvm + path: sycl_repo + fetch-depth: 1 + ref: sycl + + - name: Create sycl tests build directory + run: | + TESTS_BUILD_DIR=${{ github.workspace }}/sycl_repo/sycl/test-e2e/build + mkdir $TESTS_BUILD_DIR + echo "TESTS_BUILD_DIR=$TESTS_BUILD_DIR" >> $GITHUB_ENV + + - name: Build sycl e2e tests + working-directory: sycl_repo + run: | + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/AbiNeutral/submit-kernel.cpp -o ${{env.TESTS_BUILD_DIR}}/submit-kernel -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/Adapters/interop-l0-direct.cpp -o ${{env.TESTS_BUILD_DIR}}/interop-l0-direct -lze_loader -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/Adapters/level_zero_interop_memcpy.cpp -o ${{env.TESTS_BUILD_DIR}}/level_zero_interop_memcpy -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/Basic/build_log.cpp -o ${{env.TESTS_BUILD_DIR}}/build_log -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/PerformanceTests/ParallelFor/parallel_for_range_roundup.cpp -fsycl-range-rounding=force -o ${{env.TESTS_BUILD_DIR}}/parallel_for_range_roundup -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/USM/fill_any_size.cpp -o ${{env.TESTS_BUILD_DIR}}/fill_any_size -Iinclude + + - name: Run sycl e2e tests + env: + ONEAPI_DEVICE_SELECTOR: level_zero:gpu + UMF_LOG: "level:debug;flush:debug;output:stdout;pid:yes" + working-directory: ${{env.TESTS_BUILD_DIR}} + run: | + echo "---Run submit-kernel test" && ./submit-kernel + echo "---Run interop-l0-direct test" && ./interop-l0-direct + echo "---Run level_zero_interop_memcpy test" && ./level_zero_interop_memcpy + echo "---Run build_log test" && ./build_log + echo "---Run parallel_for_range_roundup test" && ./parallel_for_range_roundup + echo "---Run fill_any_size test" && ./fill_any_size From 8492c626dd30e5a7776cca82a7fe2cd32bc1a279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 25 Mar 2025 16:18:13 +0100 Subject: [PATCH 013/143] [CI][Bench] Add compare option to manual bench runs --- .github/workflows/benchmarks.yml | 8 ++++++++ .github/workflows/reusable_benchmarks.yml | 20 +++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index b18a41c4b..7ee8269d2 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -13,6 +13,13 @@ on: type: string required: false default: '' + bench_script_compare: + description: Compare label, passed to script executing benchmark as '--compare