./0000775000175100017510000000000015176134566011121 5ustar jenkinsjenkins./plugin/0000775000175100017510000000000015176134562012413 5ustar jenkinsjenkins./plugin/exportmap0000664000175100017510000000027015176134562014354 0ustar jenkinsjenkins{ global: roctracer_plugin_initialize; roctracer_plugin_finalize; roctracer_plugin_write_callback_record; roctracer_plugin_write_activity_records; local: *; };./plugin/CMakeLists.txt0000664000175100017510000000243715176134562015161 0ustar jenkinsjenkins################################################################################ ## Copyright (c) 2022 Advanced Micro Devices, Inc. ## ## Permission is hereby granted, free of charge, to any person obtaining a copy ## of this software and associated documentation files (the "Software"), to ## deal in the Software without restriction, including without limitation the ## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ## sell copies of the Software, and to permit persons to whom the Software is ## furnished to do so, subject to the following conditions: ## ## The above copyright notice and this permission notice shall be included in ## all copies or substantial portions of the Software. ## ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ## IN THE SOFTWARE. ################################################################################ add_subdirectory(file)./plugin/file/0000775000175100017510000000000015176134562013332 5ustar jenkinsjenkins./plugin/file/CMakeLists.txt0000664000175100017510000000472715176134562016104 0ustar jenkinsjenkins################################################################################ ## Copyright (c) 2022 Advanced Micro Devices, Inc. ## ## Permission is hereby granted, free of charge, to any person obtaining a copy ## of this software and associated documentation files (the "Software"), to ## deal in the Software without restriction, including without limitation the ## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ## sell copies of the Software, and to permit persons to whom the Software is ## furnished to do so, subject to the following conditions: ## ## The above copyright notice and this permission notice shall be included in ## all copies or substantial portions of the Software. ## ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ## IN THE SOFTWARE. ################################################################################ find_package(amd_comgr REQUIRED CONFIG PATHS /opt/rocm/ PATH_SUFFIXES lib/cmake/amd_comgr ) MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") file(GLOB FILE_SOURCES "*.cpp") add_library(file_plugin ${LIBRARY_TYPE} ${FILE_SOURCES}) set_target_properties(file_plugin PROPERTIES CXX_VISIBILITY_PRESET hidden LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR} INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}") target_compile_definitions(file_plugin PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc) target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined) target_link_libraries(file_plugin PRIVATE util roctracer amd_comgr hsa-runtime64::hsa-runtime64 stdc++fs amd_comgr) install(TARGETS file_plugin LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT runtime) install(TARGETS file_plugin LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT asan) ./plugin/file/file.cpp0000664000175100017510000003423615176134562014765 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "debug.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Macro to check ROCtracer calls status #define CHECK_ROCTRACER(call) \ do { \ if ((call) != 0) fatal("%s", roctracer_error_string()); \ } while (false) namespace fs = std::experimental::filesystem; namespace { uint32_t GetPid() { static uint32_t pid = syscall(__NR_getpid); return pid; } /* The function extracts the kernel name from input string. By using the iterators it finds the window in the string which contains only the kernel name. For example 'Foo::foo(a[], int (int))' -> 'foo'*/ std::string truncate_name(const std::string& name) { auto rit = name.rbegin(); auto rend = name.rend(); uint32_t counter = 0; char open_token = 0; char close_token = 0; while (rit != rend) { if (counter == 0) { switch (*rit) { case ')': counter = 1; open_token = ')'; close_token = '('; break; case '>': counter = 1; open_token = '>'; close_token = '<'; break; case ']': counter = 1; open_token = ']'; close_token = '['; break; case ' ': ++rit; continue; } if (counter == 0) break; } else { if (*rit == open_token) counter++; if (*rit == close_token) counter--; } ++rit; } auto rbeg = rit; while ((rit != rend) && (*rit != ' ') && (*rit != ':')) rit++; return name.substr(rend - rit, rit - rbeg); } #define amd_comgr_(call) \ do { \ if (amd_comgr_status_t status = amd_comgr_##call; status != AMD_COMGR_STATUS_SUCCESS) { \ const char* reason = ""; \ amd_comgr_status_string(status, &reason); \ fatal(#call " failed: %s", reason); \ } \ } while (false) // C++ symbol demangle std::string cxx_demangle(const std::string& symbol) { amd_comgr_data_t mangled_data; amd_comgr_(create_data(AMD_COMGR_DATA_KIND_BYTES, &mangled_data)); amd_comgr_(set_data(mangled_data, symbol.size(), symbol.data())); amd_comgr_data_t demangled_data; amd_comgr_(demangle_symbol_name(mangled_data, &demangled_data)); size_t demangled_size = 0; amd_comgr_(get_data(demangled_data, &demangled_size, nullptr)); std::string demangled_str; demangled_str.resize(demangled_size); amd_comgr_(get_data(demangled_data, &demangled_size, demangled_str.data())); amd_comgr_(release_data(mangled_data)); amd_comgr_(release_data(demangled_data)); return demangled_str; } class file_plugin_t { private: class output_file_t { public: output_file_t(std::string name) : name_(std::move(name)) {} std::string name() const { return name_; } template std::ostream& operator<<(T&& value) { if (!is_open()) open(); return stream_ << std::forward(value); } std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) { if (!is_open()) open(); return stream_ << func; } void open() { // If the stream is already in the failed state, there's no need to try to open the file. if (fail()) return; const char* output_dir = getenv("ROCP_OUTPUT_DIR"); if (output_dir == nullptr) { stream_.copyfmt(std::cout); stream_.clear(std::cout.rdstate()); stream_.basic_ios::rdbuf(std::cout.rdbuf()); return; } fs::path output_prefix(output_dir); if (!fs::is_directory(fs::status(output_prefix))) { if (!stream_.fail()) warning("Cannot open output directory '%s'", output_dir); stream_.setstate(std::ios_base::failbit); return; } std::stringstream ss; ss << GetPid() << "_" << name_; stream_.open(output_prefix / ss.str()); } bool is_open() const { return stream_.is_open(); } bool fail() const { return stream_.fail(); } private: const std::string name_; std::ofstream stream_; }; output_file_t* get_output_file(uint32_t domain, uint32_t op = 0) { switch (domain) { case ACTIVITY_DOMAIN_ROCTX: return &roctx_file_; case ACTIVITY_DOMAIN_HSA_API: return &hsa_api_file_; case ACTIVITY_DOMAIN_HIP_API: return &hip_api_file_; case ACTIVITY_DOMAIN_HIP_OPS: return &hip_activity_file_; case ACTIVITY_DOMAIN_HSA_OPS: if (op == HSA_OP_ID_COPY) { return &hsa_async_copy_file_; } else if (op == HSA_OP_ID_RESERVED1) { return &pc_sample_file_; } default: assert(!"domain/op not supported!"); break; } return nullptr; } public: file_plugin_t() { // Dumping HSA handles for agents output_file_t hsa_handles("hsa_handles.txt"); [[maybe_unused]] hsa_status_t status = hsa_iterate_agents( [](hsa_agent_t agent, void* user_data) { auto* file = static_cast(user_data); hsa_device_type_t type; if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR; *file << std::hex << std::showbase << agent.handle << " agent " << ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << "\n"; return HSA_STATUS_SUCCESS; }, &hsa_handles); assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents"); if (hsa_handles.fail()) { warning("Cannot write to '%s'", hsa_handles.name().c_str()); return; } // App begin timestamp begin_ts_file.txt output_file_t begin_ts("begin_ts_file.txt"); roctracer_timestamp_t app_begin_timestamp; CHECK_ROCTRACER(roctracer_get_timestamp(&app_begin_timestamp)); begin_ts << std::dec << app_begin_timestamp << "\n"; if (begin_ts.fail()) { warning("Cannot write to '%s'", begin_ts.name().c_str()); return; } valid_ = true; } int write_callback_record(const roctracer_record_t* record, const void* callback_data) { std::stringstream ss; output_file_t* output_file{nullptr}; switch (record->domain) { case ACTIVITY_DOMAIN_ROCTX: { const roctx_api_data_t* data = reinterpret_cast(callback_data); output_file = get_output_file(ACTIVITY_DOMAIN_ROCTX); ss << std::dec << record->begin_ns << " " << record->process_id << ":" << record->thread_id << " " << record->op << ":" << data->args.id << ":\"" << (data->args.message ? data->args.message : "") << "\"" << "\n"; *output_file << ss.str(); break; } case ACTIVITY_DOMAIN_HSA_API: { const hsa_api_data_t* data = reinterpret_cast(callback_data); output_file = get_output_file(ACTIVITY_DOMAIN_HSA_API); ss << std::dec << record->begin_ns << ":" << ((record->op == HSA_API_ID_hsa_shut_down) ? record->begin_ns : record->end_ns) << " " << record->process_id << ":" << record->thread_id << " " << hsa_api_data_pair_t(record->op, *data) << " :" << std::dec << data->correlation_id << "\n"; *output_file << ss.str(); break; } case ACTIVITY_DOMAIN_HIP_API: { const hip_api_data_t* data = reinterpret_cast(callback_data); std::string kernel_name; if (record->kernel_name) { static bool truncate = []() { const char* env_var = getenv("ROCP_TRUNCATE_NAMES"); return env_var && std::atoi(env_var) != 0; }(); kernel_name = cxx_demangle(record->kernel_name); if (truncate) kernel_name = truncate_name(kernel_name); kernel_name = " kernel=" + kernel_name; } output_file = get_output_file(ACTIVITY_DOMAIN_HIP_API); ss << std::dec << record->begin_ns << ":" << record->end_ns << " " << record->process_id << ":" << record->thread_id << " " << hipApiString((hip_api_id_t)record->op, data) << kernel_name << " :" << std::dec << data->correlation_id << "\n"; *output_file << ss.str(); break; } default: warning("write_callback_record: ignored record for domain %d", record->domain); break; } return (output_file && output_file->fail()) ? -1 : 0; } int write_activity_records(const roctracer_record_t* begin, const roctracer_record_t* end) { while (begin != end) { std::stringstream ss; output_file_t* output_file{nullptr}; const char* name = roctracer_op_string(begin->domain, begin->op, begin->kind); switch (begin->domain) { case ACTIVITY_DOMAIN_HIP_OPS: { // The post-processing script cannot handle HIP ops without a correlation ID. The // correlation ID is needed to connect the record to a HIP stream and originating thread. // The script could be modified to handle ops without correlation IDs, but for backward // compatibilty, we are simply dropping the records here. if (begin->correlation_id == 0) break; output_file = get_output_file(ACTIVITY_DOMAIN_HIP_OPS); ss << std::dec << begin->begin_ns << ":" << begin->end_ns << " " << begin->device_id << ":" << begin->queue_id << " " << ((begin->op == HIP_OP_ID_DISPATCH && begin->kernel_name != nullptr) ? truncate_name(cxx_demangle(begin->kernel_name)) : name) << ":" << begin->correlation_id << ":" << GetPid() << "\n"; *output_file << ss.str(); break; } case ACTIVITY_DOMAIN_HSA_OPS: output_file = get_output_file(ACTIVITY_DOMAIN_HSA_OPS, begin->op); if (begin->op == HSA_OP_ID_COPY) { ss << std::dec << begin->begin_ns << ":" << begin->end_ns << " async-copy:" << begin->correlation_id << ":" << GetPid() << "\n"; *output_file << ss.str(); break; } else if (begin->op == HSA_OP_ID_RESERVED1) { ss << std::dec << begin->pc_sample.se << " " << begin->pc_sample.cycle << " " << std::hex << std::showbase << begin->pc_sample.pc << " " << name << "\n"; *output_file << ss.str(); break; } [[fallthrough]]; default: { warning("write_activity_records: ignored activity for domain %d", begin->domain); break; } } if (output_file && output_file->fail()) return -1; CHECK_ROCTRACER(roctracer_next_record(begin, &begin)); } return 0; } bool is_valid() const { return valid_; } private: bool valid_{false}; output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"}, hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"}, hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"}; }; file_plugin_t* file_plugin = nullptr; } // namespace ROCTRACER_EXPORT int roctracer_plugin_initialize(uint32_t roctracer_major_version, uint32_t roctracer_minor_version) { if (roctracer_major_version != ROCTRACER_VERSION_MAJOR || roctracer_minor_version < ROCTRACER_VERSION_MINOR) return -1; if (file_plugin != nullptr) return -1; file_plugin = new file_plugin_t(); if (file_plugin->is_valid()) return 0; // The plugin failed to initialied, destroy it and return an error. delete file_plugin; file_plugin = nullptr; return -1; } ROCTRACER_EXPORT void roctracer_plugin_finalize() { if (!file_plugin) return; delete file_plugin; file_plugin = nullptr; } ROCTRACER_EXPORT int roctracer_plugin_write_callback_record(const roctracer_record_t* record, const void* callback_data) { if (!file_plugin || !file_plugin->is_valid()) return -1; return file_plugin->write_callback_record(record, callback_data); } ROCTRACER_EXPORT int roctracer_plugin_write_activity_records(const roctracer_record_t* begin, const roctracer_record_t* end) { if (!file_plugin || !file_plugin->is_valid()) return -1; return file_plugin->write_activity_records(begin, end); } ./defaults.sh0000664000175100017510000000243015176134562013257 0ustar jenkinsjenkins################################################################################ # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. ################################################################################ BUILD_DIR=build TO_CLEAN=yes ./CMakeLists.txt0000664000175100017510000002274615176134566013674 0ustar jenkinsjenkins################################################################################ ## Copyright (c) 2018-2022 Advanced Micro Devices, Inc. ## ## Permission is hereby granted, free of charge, to any person obtaining a copy ## of this software and associated documentation files (the "Software"), to ## deal in the Software without restriction, including without limitation the ## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ## sell copies of the Software, and to permit persons to whom the Software is ## furnished to do so, subject to the following conditions: ## ## The above copyright notice and this permission notice shall be included in ## all copies or substantial portions of the Software. ## ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ## IN THE SOFTWARE. ################################################################################ cmake_minimum_required(VERSION 3.18.0) project(roctracer VERSION 4.1.0) if(${ROCM_PATCH_VERSION}) set(PROJECT_VERSION_PATCH ${ROCM_PATCH_VERSION}) set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}") endif() include(GNUInstallDirs) # set default ROCM_PATH if(NOT DEFINED ROCM_PATH) set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory") endif() ## Build is not supported on Windows plaform if(WIN32) message(FATAL_ERROR "Windows build is not supported.") endif() set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) add_compile_options(-Wall -Wno-error=ignored-attributes -Werror) # To set addition RUNPATH in libraries # installed in /opt/rocm-ver/lib/roctracer set(ROCM_APPEND_PRIVLIB_RPATH "$ORIGIN/..") set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) find_package(hsa-runtime64 REQUIRED CONFIG PATHS ${ROCM_PATH}) find_package(HIP REQUIRED CONFIG PATHS ${ROCM_PATH}) if(NOT DEFINED LIBRARY_TYPE) set(LIBRARY_TYPE SHARED) endif() ## Build libraries add_subdirectory(src) ## Build tests if(${LIBRARY_TYPE} STREQUAL SHARED) add_subdirectory(test) endif() ## Build Plugins add_subdirectory(plugin) if(${LIBRARY_TYPE} STREQUAL SHARED) ## Installation and packaging if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX) get_filename_component(DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME) get_filename_component(DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY) set(CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR}) endif() message("-----------Dest-name: ${DEST_NAME}") message("------Install-prefix: ${CMAKE_INSTALL_PREFIX}") message("-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}") ## Packaging directives set(CPACK_GENERATOR "DEB" "RPM" "TGZ" CACHE STRING "CPACK GENERATOR DEB;RPM") set(ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.") set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH}) set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") set(CPACK_PACKAGE_CONTACT "ROCm Profiler Support ") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD ROCTRACER library") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") if(DEFINED ENV{ROCM_LIBPATCH_VERSION}) set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}") message("Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}") endif() ## Install license file install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime) install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan) ## Debian package specific variables if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) else() set(CPACK_DEBIAN_PACKAGE_RELEASE "local") endif() message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}") set(CPACK_DEB_COMPONENT_INSTALL ON) set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}") set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core") set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev") set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${PROJECT_NAME}, hsa-rocr-dev, rocm-core") set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests") set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${PROJECT_NAME}-dev") # Debian package specific variable for ASAN set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan" ) ## RPM package specific variables if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE}) else() set(CPACK_RPM_PACKAGE_RELEASE "local") endif() message("Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}") set(CPACK_RPM_PACKAGE_LICENSE "MIT") ## 'dist' breaks manual builds on debian systems due to empty Provides execute_process(COMMAND rpm --eval %{?dist} RESULT_VARIABLE PROC_RESULT OUTPUT_VARIABLE EVAL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}") if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "") string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}") endif() set(CPACK_RPM_COMPONENT_INSTALL ON) set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}") set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core") set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel") set(CPACK_RPM_DEV_PACKAGE_REQUIRES "${PROJECT_NAME}, rocm-core") set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-dev") set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-dev") set(CPACK_RPM_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests") set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, rocm-llvm-devel") message("CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}") # RPM package specific variable for ASAN set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan" ) #Disable build id for rocprofiler as its creating transaction error set ( CPACK_RPM_SPEC_MORE_DEFINE "%define _build_id_links none %global __strip ${CPACK_STRIP_EXECUTABLE} %global __objdump ${CPACK_OBJDUMP_EXECUTABLE} %global __objcopy ${CPACK_OBJCOPY_EXECUTABLE} %global __readelf ${CPACK_READELF_EXECUTABLE}") if(NOT ROCM_DEP_ROCMCORE) string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES}) string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES}) string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES}) string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS}) string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS}) string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS}) endif() if(ENABLE_ASAN_PACKAGING) # ASAN Package requires asan component with only libraries and license file set(CPACK_COMPONENTS_ALL asan) else() set(CPACK_COMPONENTS_ALL runtime dev tests) endif() include(CPack) cpack_add_component(runtime DISPLAY_NAME "Runtime" DESCRIPTION "Dynamic libraries for the ROCtracer") cpack_add_component(dev DISPLAY_NAME "Devel" DESCRIPTION "Header files and documentation for ROCtracer") cpack_add_component(tests DISPLAY_NAME "Tests" DESCRIPTION "Tests for the ROCtracer" DEPENDS runtime) cpack_add_component(asan DISPLAY_NAME "ASAN" DESCRIPTION "ASAN libraries for the ROCtracer") endif() find_package(Doxygen) if(DOXYGEN_FOUND) ## Set input and output files set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in) set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) ## Request to configure the file configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html ${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT} COMMAND make -C ${CMAKE_CURRENT_BINARY_DIR}/doc/latex pdf MAIN_DEPENDENCY ${DOXYGEN_OUT} ${DOXYGEN_IN} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_plugin.h COMMENT "Generating documentation") add_custom_target(doc DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html ${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf" DESTINATION ${CMAKE_INSTALL_DOCDIR} RENAME "roctracer.pdf" OPTIONAL COMPONENT dev) install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/doc/html/" DESTINATION ${CMAKE_INSTALL_DATADIR}/html/${PROJECT_NAME} OPTIONAL COMPONENT dev) endif() ./.clang-format0000664000175100017510000000347315176134562013477 0ustar jenkinsjenkins--- Language: Cpp # BasedOnStyle: Google AccessModifierOffset: -1 ConstructorInitializerIndentWidth: 4 AlignEscapedNewlinesLeft: false AlignTrailingComments: true AlignConsecutiveAssignments: false AlignOperands: false AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakTemplateDeclarations: false AlwaysBreakBeforeMultilineStrings: true BreakBeforeBinaryOperators: false BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BinPackParameters: true ColumnLimit: 100 ConstructorInitializerAllOnOneLineOrOnePerLine: true ExperimentalAutoDetectBinPacking: false IndentCaseLabels: true IndentWrappedFunctionNames: false IndentFunctionDeclarationAfterType: false MaxEmptyLinesToKeep: 2 KeepEmptyLinesAtTheStartOfBlocks: false NamespaceIndentation: None ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: false PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakString: 1000 PenaltyBreakFirstLessLess: 120 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 DerivePointerAlignment: false PointerAlignment: Left SpacesBeforeTrailingComments: 2 Cpp11BracedListStyle: true Standard: Auto IndentWidth: 2 TabWidth: 8 UseTab: Never BreakBeforeBraces: Attach SpacesInParentheses: false SpacesInAngles: false SpaceInEmptyParentheses: false SpacesInCStyleCastParentheses: false SpacesInContainerLiterals: true SpaceBeforeAssignmentOperators: true ContinuationIndentWidth: 4 CommentPragmas: '^ IWYU pragma:' ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] SpaceBeforeParens: ControlStatements DisableFormat: false SortIncludes: false ... ./script/0000775000175100017510000000000015176134562012421 5ustar jenkinsjenkins./script/gen_ostream_ops.py0000775000175100017510000003314515176134562016170 0ustar jenkinsjenkins#!/usr/bin/env python3 ################################################################################ # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. ################################################################################ import os, sys, re import CppHeaderParser import argparse import string LICENSE = \ '/*\n' + \ 'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \ '\n' + \ 'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \ 'of this software and associated documentation files (the "Software"), to deal\n' + \ 'in the Software without restriction, including without limitation the rights\n' + \ 'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \ 'copies of the Software, and to permit persons to whom the Software is\n' + \ 'furnished to do so, subject to the following conditions:\n' + \ '\n' + \ 'The above copyright notice and this permission notice shall be included in\n' + \ 'all copies or substantial portions of the Software.\n' + \ '\n' + \ 'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \ 'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \ 'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \ 'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \ 'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \ 'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \ 'THE SOFTWARE.\n' + \ '*/\n' header_basic = \ 'namespace detail {\n' + \ ' inline static void print_escaped_string(std::ostream& out, const char *v, size_t len) {\n' + \ ' out << \'"\'; \n' + \ ' for (size_t i = 0; i < len && v[i]; ++i) {\n' + \ ' switch (v[i]) {\n' + \ ' case \'\\"\': out << "\\\\\\""; break;\n' + \ ' case \'\\\\\': out << "\\\\\\\\"; break;\n' + \ ' case \'\\b\': out << "\\\\\\b"; break;\n' + \ ' case \'\\f\': out << "\\\\\\f"; break;\n' + \ ' case \'\\n\': out << "\\\\\\n"; break;\n' + \ ' case \'\\r\': out << "\\\\\\r"; break;\n' + \ ' case \'\\t\': out << "\\\\\\t"; break;\n' + \ ' default:\n' + \ ' if (std::isprint((unsigned char)v[i])) std::operator<<(out, v[i]);\n' + \ ' else {\n' + \ ' std::ios_base::fmtflags flags(out.flags());\n' + \ ' out << "\\\\x" << std::setfill(\'0\') << std::setw(2) << std::hex << (unsigned int)(unsigned char)v[i];\n' + \ ' out.flags(flags);\n' + \ ' }\n' + \ ' break;\n' + \ ' }\n' + \ ' }\n' + \ ' out << \'"\'; \n' + \ ' }\n' + \ '\n' + \ ' template \n' + \ ' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \ ' using std::operator<<;\n' + \ ' static bool recursion = false;\n' + \ ' if (recursion == false) { recursion = true; out << v; recursion = false; }\n' + \ ' return out;\n }\n' + \ '\n' + \ ' inline static std::ostream &operator<<(std::ostream &out, const unsigned char &v) {\n' + \ ' out << (unsigned int)v;\n' + \ ' return out;\n }\n' + \ '\n' + \ ' inline static std::ostream &operator<<(std::ostream &out, const char &v) {\n' + \ ' out << (unsigned char)v;\n' + \ ' return out;\n }\n' + \ '\n' + \ ' template \n' + \ ' inline static std::ostream &operator<<(std::ostream &out, const char (&v)[N]) {\n' + \ ' print_escaped_string(out, v, N);\n' + \ ' return out;\n }\n' + \ '\n' + \ ' inline static std::ostream &operator<<(std::ostream &out, const char *v) {\n' + \ ' print_escaped_string(out, v, strlen(v));\n' + \ ' return out;\n }\n' structs_analyzed = {} global_ops = '' global_str = '' output_filename_h = None apiname = "" # process_struct traverses recursively all structs to extract all fields def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, apiname): # file_handle: handle for output file {api_name}_ostream_ops.h to be generated # cppHeader_struct: cppHeader struct being processed # cppHeader: cppHeader object created by CppHeaderParser.CppHeader(...) # parent_hier_name: parent hierarchical name used for nested structs/enums # apiname: for example hip. global global_str if cppHeader_struct == 'max_align_t': #function pointers not working in cppheaderparser return if cppHeader_struct not in cppHeader.classes: return if cppHeader_struct in structs_analyzed: return structs_analyzed[cppHeader_struct] = 1 for l in reversed(range(len(cppHeader.classes[cppHeader_struct]["properties"]["public"]))): key = 'name' name = "" if key in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]: if parent_hier_name != '': name = parent_hier_name + '.' + cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key] else: name = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key] if name == '': continue key2 = 'type' mtype = "" if key2 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]: mtype = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key2] if mtype == '': continue key3 = 'array_size' array_size = "" if key3 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]: array_size = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key3] key4 = 'property_of_class' prop = "" if key4 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]: prop = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key4] str = '' if "union" not in mtype: indent = "" str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n" indent = " " str += indent + " std::operator<<(out, \"" + name + "=\");\n" if (name == 'reserved' and apiname.upper() == 'HIP'): str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, 0);\n" else: str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n" str += indent + " std::operator<<(out, \", \");\n" str += " }\n" if "void" not in mtype: global_str += str else: if prop != '': next_cppHeader_struct = prop + "::" process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname) next_cppHeader_struct = prop + "::" + mtype + " " process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname) next_cppHeader_struct = cppHeader_struct + "::" process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname) # Parses API header file and generates ostream ops files ostream_ops.h def gen_cppheader(infilepath, outfilepath, rank): # infilepath: API Header file to be parsed # outfilepath: Output file where ostream operators are written global global_ops global output_filename_h global apiname global global_str try: cppHeader = CppHeaderParser.CppHeader(infilepath) except CppHeaderParser.CppParseError as e: print(e) sys.exit(1) if rank == 0 or rank == 2: mpath = os.path.dirname(outfilepath) if mpath == "": mpath = os.getcwd() apiname = outfilepath.replace(mpath + "/","") output_filename_h = open(outfilepath,"w+") apiname = apiname.replace("_ostream_ops.h","") apiname = apiname.upper() output_filename_h.write("// automatically generated\n") output_filename_h.write(LICENSE + '\n') header_s = \ '#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ '#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ '\n' if apiname.upper() == 'HIP': header_s = \ header_s + \ '#include \n' + \ '#include \n' header_s = \ header_s + \ '#include "roctracer.h"\n' + \ '\n' + \ '#ifdef __cplusplus\n' + \ '#include \n' + \ '#include \n' + \ '#include \n' + \ '#include \n' output_filename_h.write(header_s) output_filename_h.write('\n') output_filename_h.write('namespace roctracer {\n') output_filename_h.write('namespace ' + apiname.lower() + '_support {\n') output_filename_h.write('static int ' + apiname.upper() + '_depth_max = 1;\n') output_filename_h.write('static int ' + apiname.upper() + '_depth_max_cnt = 0;\n') output_filename_h.write('static std::string ' + apiname.upper() + '_structs_regex = \"\";\n') output_filename_h.write('// begin ostream ops for '+ apiname + ' \n') output_filename_h.write("// basic ostream ops\n") output_filename_h.write(header_basic) output_filename_h.write("// End of basic ostream ops\n\n") for c in cppHeader.classes.copy(): # Types defined inside of unions are incorrectly prepended with "union " after parsing by CppHeaderParser # Remove "union " from the beginning of the full class name to correct the eventual output if "union " in c[0:6] and "::union" not in c[-8:]: new_name = c[6:] cppHeader.classes[new_name] = cppHeader.classes[c] del cppHeader.classes[c] for c in cppHeader.classes: if c[-2] == ':' and c[-1] == ':': continue #ostream operator cannot be overloaded for anonymous struct therefore it is skipped if "::union" in c: continue if c in structs_analyzed: continue if c == 'max_align_t' or c == '__fsid_t': # Skipping as it is defined in multiple domains continue if c.startswith("_") or c.startswith("pthread_") or c.startswith("__pthread_"): continue if len(cppHeader.classes[c]["properties"]["public"]) != 0: output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n") output_filename_h.write("{\n") output_filename_h.write(" std::operator<<(out, '{');\n") output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n") output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" ) process_struct(output_filename_h, c, cppHeader, "", apiname) global_str = "\n".join(global_str.split("\n")[0:-3]) if global_str != '': global_str += "\n }\n" output_filename_h.write(global_str) output_filename_h.write(" };\n") output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n") output_filename_h.write(" std::operator<<(out, '}');\n") output_filename_h.write(" return out;\n") output_filename_h.write("}\n") global_str = '' global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n" if rank == 1 or rank == 2: footer = '// end ostream ops for '+ apiname + ' \n' footer += '};};};\n\n' output_filename_h.write(footer) output_filename_h.write(global_ops) footer = '#endif //__cplusplus\n' + \ '#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ ' \n' output_filename_h.write(footer) output_filename_h.close() print('File ' + outfilepath + ' generated') return parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.') requiredNamed = parser.add_argument_group('Required arguments') requiredNamed.add_argument('-in', metavar='fileList', help='Comma separated list of header files to be parsed', required=True) requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True) args = vars(parser.parse_args()) if __name__ == '__main__': flist = args['in'].split(',') if len(flist) == 1: gen_cppheader(flist[0], args['out'],2) else: for i in range(len(flist)): if i == 0: gen_cppheader(flist[i], args['out'],0) elif i == len(flist)-1: gen_cppheader(flist[i], args['out'],1) else: gen_cppheader(flist[i], args['out'],-1) ./script/check_trace.py0000775000175100017510000002765615176134562015251 0ustar jenkinsjenkins#!/usr/bin/env python3 ################################################################################ # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. ################################################################################ import sys, os, re import filecmp import argparse events_count = {} events_order = {} events_order_r = {} trace2info = {} trace2info_filename = 'test/golden_traces/tests_trace_cmp_levels.txt' # Parses trace comparison config file and stores the info in a dictionary def parse_trace_levels(trace_config_filename, check_trace_flag): status = 0 f = open(trace_config_filename) trace2info = {} for line in f: if check_trace_flag == 0: return (trace2info, status) if (check_trace_flag == None) and re.match('^# dummy',line): return (trace2info, status) status = 1 lis = line.split(' ') trace_name = lis[0] comp_level = lis[1] no_events_cnt = '' events2ignore = '' events2chkcnt = '' events2chkord = '' events2ch = '' for l in lis: if no_events_cnt == ' ': no_events_cnt = l if events2ignore == ' ': events2ignore = l if events2chkcnt == ' ': events2chkcnt = l if events2chkord == ' ': events2chkord = l if events2ch == ' ': events2ch = l events2chkcnt = l no_events_cnt = l if l == '--ignore-count': no_events_cnt = ' ' if l == '--ignore-event': events2ignore = ' ' if l == '--check-count': events2chkcnt = ' ' if l == '--check-order': events2chkord = ' ' if l == '--check-events': events2ch = ' ' trace2info[trace_name] = (comp_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,events2ch) return (trace2info, status) # diff multi lines strings to show events differences def diff_strings(cnt_r, cnt, metric): global events_order_r global events_order print ("\nDiffs (if any):\n") if metric == 'cnt': evt_ptrn = re.compile(r'(\w+).*$') #cnt_ptrn = re.compile(r'(\w+): count (\d+)$') for evt in cnt_r.split('\n'): mevt_ptrn = evt_ptrn.match(evt) #mcnt_ptrn = cnt_ptrn.match(evt) if mevt_ptrn: if not re.search(mevt_ptrn.group(1), cnt): print ('+ ' + evt) elif not re.search(evt, cnt): print ('>D< ' + evt) for evt in cnt.split('\n'): mevt_ptrn = evt_ptrn.match(evt) #mcnt_ptrn = cnt_ptrn.match(evt) if mevt_ptrn: if not re.search(mevt_ptrn.group(1), cnt_r): print ('- ' + evt) if metric == 'or': cnt_tid_r = 0 for tid_r in sorted (events_order_r.keys()): if len(events_order) == 0: print ("+ " + str(events_order_r[tid_r]) + "\n\n") continue cnt_tid = 0 for tid in sorted (events_order.keys()): if cnt_tid == cnt_tid_r: if events_order_r[tid_r] != events_order[tid]: #print (">D< " + str(events_order_r[tid_r]) + "\n") #print (">D< " + str(events_order[tid]) + "\n\n") diff_cnt_r = 0 found_diff_evt = 0 for evt in events_order_r[tid_r]: diff_cnt = 0 for evt2 in events_order[tid]: if diff_cnt == diff_cnt_r: if evt != evt2: print (">I< Difference starts at tid rank: " + str(cnt_tid) + " event index: " + str(diff_cnt_r) + ", tid_r " + str(tid_r) + ", tid " + str(tid) + ", with evts " + evt + " and " + evt2 + "\n") found_diff_evt = 1 break diff_cnt += 1 diff_cnt_r += 1 if found_diff_evt: break if len(events_order_r[tid_r]) != len(events_order[tid]) and found_diff_evt == 0: print (">I< Difference starts at tid rank: " + str(cnt_tid) + " event index: " + str(min(len(events_order_r[tid_r]), len(events_order[tid]))) + ", with missing evts\n") break cnt_tid += 1 cnt_tid_r += 1 if len(events_order_r) == 0: for tid in sorted (events_order.keys()): print ("- " + str(events_order[tid]) + "\n") # check trace againt golden reference and returns 0 for pass, 1 for fail def check_trace_status(tracename, verbose, check_trace_flag): global events_order_r global events_order (trace2info, status) = parse_trace_levels(trace2info_filename, check_trace_flag) if len(trace2info) == 0: if status == 1: print ("Error: no trace comparison info found in config file " + trace2info_filename + "\n") print('FAILED!') return 1 if status == 0: print('PASSED!') return 0 trace = 'test/golden_traces/' + tracename + '.txt' rtrace = '/tmp/test/out/' + tracename + '.out' if os.path.basename(tracename) in trace2info.keys(): (trace_level, no_events_cnt, events2ignore, events2chkcnt, events2chkord, events2ch) = trace2info[os.path.basename(tracename)] trace_level = trace_level.rstrip('\n') no_events_cnt = no_events_cnt.rstrip('\n') events2ignore = events2ignore.rstrip('\n') events2chkcnt = events2chkcnt.rstrip('\n') events2chkord = events2chkord.rstrip('\n') events2ch = events2ch.rstrip('\n') else: print('Trace ' + os.path.basename(tracename) + ' not found in ' + trace2info_filename) print('FAILED!') return 1 if no_events_cnt == '': no_events_cnt = 'empty-regex' if events2ignore == '': events2ignore = 'empty-regex' if events2chkcnt == '': events2chkcnt = '' if events2chkord == '': events2chkord = '' if trace_level == '--check-none': print('PASSED!') return 0 if trace_level == '--check-diff': if filecmp.cmp(trace,rtrace): print('PASSED!') return 0 else: print('FAILED!') os.system('/usr/bin/diff --brief ' + trace + ' ' + rtrace) return 1 metric = '' if trace_level == '--check-count' or trace_level == '--check-events': metric = 'cnt' if trace_level == '--check-order': metric = 'or' cnt_r = gen_events_info(rtrace,trace_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,verbose) events_order_r = {} for tid in sorted (events_order.keys()) : events_order_r[tid] = events_order[tid] cnt = gen_events_info(trace,trace_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,verbose) if verbose: print ('\n' + rtrace + ':\n') print (cnt_r) print ('\n' + trace + ':\n') print (cnt) diff_strings(cnt_r, cnt, metric) if cnt_r == cnt: print('PASSED!') return 0 else: print('FAILED!') return 1 # Parses roctracer trace file for regression purpose # and generates events count per event (when cnt is on) or events order per tid (when order is on) def gen_events_info(tracefile, trace_level, no_events_cnt, events2ignore, events2chkcnt, events2chkord, verbose): global events_order metric = '' if trace_level == '--check-count' or trace_level == '--check-events': metric = 'cnt' if trace_level == '--check-order': metric = 'or' events_count = {} events_order = {} res = '' re_no_events_cnt = r'{}'.format(no_events_cnt) re_events2ignore = r'{}'.format(events2ignore) re_events2chkcnt = r'{}'.format(events2chkcnt) re_events2chkord = r'{}'.format(events2chkord) test_act_pattern = re.compile(r'\s*(\w+)\s+.*_id\((\d+)\)$') #' hipSetDevice correlation_id(1) time_ns(1548622357525055:1548622357542015) process_id(126283) thread_id(126283)' #' hcCommandKernel correlation_id(6) time_ns(1548622661443020:1548622662666935) device_id(0) queue_id(0)' test_api_cb_pattern = re.compile(r'.*<(\w+)\s+.*tid\((\d+)\)>') # # below is roctx pattern # tool_record = re.compile(r'\d+:\d+\s+\d+:(\d+)\s+(\w+)') # tool_api_record # 1822810364769411:1822810364771941 116477:116477 hsa_agent_get_info(, 17, 0x7ffeac015fec) = 0 # tool_gpu_act_record # 3632773658039902:3632773658046462 0:0 hcCommandMarker:273 roctx_record = re.compile(r'\d+\s\d+:(\d)+\s(\d):\d+:\".*\"') with open(tracefile) as f: for line in f: if re.search("before", line) or re.search("after",line):#roctx before/after not real events continue line=line.rstrip('\n') event = '' test_act_pattern_match = test_act_pattern.match(line) if test_act_pattern_match: event = test_act_pattern_match.group(1) tid = int(test_act_pattern_match.group(2)) test_api_cb_pattern_match = test_api_cb_pattern.match(line) if test_api_cb_pattern_match: event = test_api_cb_pattern_match.group(1) tid = int(test_api_cb_pattern_match.group(2)) tool_record_match = tool_record.match(line) if tool_record_match: event = tool_record_match.group(2) tid = int(tool_record_match.group(1)) roctx_record_match = roctx_record.match(line) if roctx_record_match: event = roctx_record_match.group(2) tid = int(roctx_record_match.group(1)) if event == '' or event == '(null)': #some traces has these null events continue if re.search(re_events2ignore,event): continue if metric == 'cnt' and re.search(re_events2chkcnt,event): if event in events_count: events_count[event] = events_count[event] + 1 else: if not re.search(re_no_events_cnt,event): events_count[event] = 1 if metric == 'or' and re.search(re_events2chkord,event): if tid in events_order.keys(): if re.search(re_no_events_cnt,event): if event != events_order[tid][-1]: #Add event only if it is not last event in the list events_order[tid].append(event) else: events_order[tid].append(event) else: events_order[tid] = [event] if metric == 'cnt': for event,count in events_count.items(): if re.search(re_no_events_cnt,event): res = res + event + '\n' else: res = res + event + " : count " + str(count) + '\n' if metric == 'or': for tid in sorted (events_order.keys()) : res = res + str(events_order[tid]) if metric == 'cnt': newres = res.split('\n') newres = sorted(newres) res = str(newres) return res parser = argparse.ArgumentParser(description='check_trace.py: check a trace aainst golden ref. Returns 0 for success, 1 for failure') requiredNamed = parser.add_argument_group('Required arguments') requiredNamed.add_argument('-in', metavar='file', help='Name of trace to be checked', required=True) requiredNamed.add_argument('-v', action='store_true', help='debug info', required=False) requiredNamed.add_argument('-ck', metavar='N', type=int, help='check trace 0|1', required=False) args = vars(parser.parse_args()) if __name__ == '__main__': sys.exit(check_trace_status(args['in'],args['v'],args['ck'])) ./script/hsaap.py0000775000175100017510000005136115176134562014100 0ustar jenkinsjenkins#!/usr/bin/env python3 ################################################################################ # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. ################################################################################ from __future__ import print_function import os, sys, re H_OUT='hsa_prof_str.h' CPP_OUT='hsa_prof_str.inline.h' API_TABLES_H = 'hsa_api_trace.h' API_HEADERS_H = ( ('CoreApi', 'hsa.h'), ('AmdExt', 'hsa_ext_amd.h'), ('ImageExt', 'hsa_ext_image.h'), ('AmdExt', API_TABLES_H), ) LICENSE = \ '/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.\n' + \ '\n' + \ ' Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \ ' of this software and associated documentation files (the "Software"), to deal\n' + \ ' in the Software without restriction, including without limitation the rights\n' + \ ' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \ ' copies of the Software, and to permit persons to whom the Software is\n' + \ ' furnished to do so, subject to the following conditions:\n' + \ '\n' + \ ' The above copyright notice and this permission notice shall be included in\n' + \ ' all copies or substantial portions of the Software.\n' + \ '\n' + \ ' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \ ' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \ ' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \ ' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \ ' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \ ' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \ ' THE SOFTWARE. */\n' ############################################################# # Error handler def fatal(module, msg): print (module + ' Error: "' + msg + '"', file = sys.stderr) sys.exit(1) # Get next text block def NextBlock(pos, record): if len(record) == 0: return pos space_pattern = re.compile(r'(\s+)') word_pattern = re.compile(r'([\w\*]+)') if record[pos] != '(': m = space_pattern.match(record, pos) if not m: m = word_pattern.match(record, pos) if m: return pos + len(m.group(1)) else: fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")") else: count = 0 for index in range(pos, len(record)): if record[index] == '(': count = count + 1 elif record[index] == ')': count = count - 1 if count == 0: index = index + 1 break if count != 0: fatal('NextBlock', "count is not zero (" + str(count) + ")") if record[index - 1] != ')': fatal('NextBlock', "last char is not ')' '" + record[index - 1] + "'") return index ############################################################# # API table parser class class API_TableParser: def fatal(self, msg): fatal('API_TableParser', msg) def __init__(self, header, name): self.name = name if not os.path.isfile(header): self.fatal("file '" + header + "' not found") self.inp = open(header, 'r') self.beg_pattern = re.compile('^\s*struct\s+' + name + 'Table\s*{\s*$') self.end_pattern = re.compile('^\s*};\s*$') self.array = [] self.parse() # normalizing a line def norm_line(self, line): return re.sub(r'^\s+', r' ', line[:-1]) # check for start record def is_start(self, record): return self.beg_pattern.match(record) # check for end record def is_end(self, record): return self.end_pattern.match(record) # check for declaration entry record def is_entry(self, record): return re.match(r'^\s*decltype\(([^\)]*)\)', record) # parse method def parse(self): active = 0 for line in self.inp.readlines(): record = self.norm_line(line) if self.is_start(record): active = 1 if active != 0: if self.is_end(record): return m = self.is_entry(record) if m: self.array.append(m.group(1)) ############################################################# # API declaration parser class class API_DeclParser: def fatal(self, msg): fatal('API_DeclParser', msg) def __init__(self, header, array, data): if not os.path.isfile(header): self.fatal("file '" + header + "' not found") self.inp = open(header, 'r') self.end_pattern = re.compile('\);\s*$') self.data = data for call in array: if call in data: self.fatal(call + ' is already found') self.parse(call) # api record filter def api_filter(self, record): record = re.sub(r'\sHSA_API\s', r' ', record) record = re.sub(r'\sHSA_DEPRECATED\s', r' ', record) return record # check for start record def is_start(self, call, record): return re.search('\s' + call + '\s*\(', record) # check for API method record def is_api(self, call, record): record = self.api_filter(record) return re.match('\s+\S+\s+' + call + '\s*\(', record) # check for end record def is_end(self, record): return self.end_pattern.search(record) # parse method args def get_args(self, record): struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []} record = re.sub(r'^\s+', r'', record) record = re.sub(r'\s*(\*+)\s*', r'\1 ', record) rind = NextBlock(0, record) struct['ret'] = record[0:rind] pos = record.find('(') end = NextBlock(pos, record); args = record[pos:end] args = re.sub(r'^\(\s*', r'', args) args = re.sub(r'\s*\)$', r'', args) args = re.sub(r'\s*,\s*', r',', args) struct['args'] = re.sub(r',', r', ', args) if len(args) == 0: return struct pos = 0 args = args + ',' while pos < len(args): ind1 = NextBlock(pos, args) # type ind2 = NextBlock(ind1, args) # space if args[ind2] != '(': while ind2 < len(args): end = NextBlock(ind2, args) if args[end] == ',': break else: ind2 = end name = args[ind2:end] else: ind3 = NextBlock(ind2, args) # field m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3]) if not m: self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'") name = m.group(1) end = NextBlock(ind3, args) # the rest item = args[pos:end] struct['astr'][name] = item struct['alst'].append(name) struct['tlst'].append(item) if args[end] != ',': self.fatal("no comma '" + args + "'") pos = end + 1 return struct # parse given api def parse(self, call): record = '' active = 0 found = 0 api_name = '' prev_line = '' self.inp.seek(0) for line in self.inp.readlines(): record += ' ' + line[:-1] record = re.sub(r'^\s*', r' ', record) if active == 0: if self.is_start(call, record): active = 1 m = self.is_api(call, record) if not m: record = ' ' + prev_line + ' ' + record m = self.is_api(call, record) if not m: self.fatal("bad api '" + line + "'") if active == 1: if self.is_end(record): self.data[call] = self.get_args(record) active = 0 found = 0 if active == 0: record = '' prev_line = line ############################################################# # API description parser class class API_DescrParser: def fatal(self, msg): fatal('API_DescrParser', msg) def __init__(self, out_h_file, hsa_dir, api_table_h, api_headers, license): out_macro = re.sub(r'[\/\.]', r'_', out_h_file.upper()) + '_' self.h_content = '' self.cpp_content = '' self.api_names = [] self.api_calls = {} self.api_rettypes = set() self.api_id = {} api_data = {} api_list = [] ns_calls = [] for i in range(0, len(api_headers)): (name, header) = api_headers[i] if i < len(api_headers) - 1: api = API_TableParser(hsa_dir + api_table_h, name) api_list = api.array self.api_names.append(name) self.api_calls[name] = api_list else: api_list = ns_calls ns_calls = [] for call in api_list: if call in api_data: self.fatal("call '" + call + "' is already found") API_DeclParser(hsa_dir + header, api_list, api_data) for call in api_list: if not call in api_data: # Not-supported functions ns_calls.append(call) else: # API ID map self.api_id[call] = 'HSA_API_ID_' + call # Return types self.api_rettypes.add(api_data[call]['ret']) self.api_rettypes.discard('void') self.api_data = api_data self.ns_calls = ns_calls self.h_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n" self.h_content += "/* HSA API tracing primitives\n" for (name, header) in api_headers: self.h_content += " '" + name + "', header '" + header + "', " + str(len(self.api_calls[name])) + ' funcs\n' for call in self.ns_calls: self.h_content += ' ' + call + ' was not parsed\n' self.h_content += " */\n" self.h_content += '\n' self.h_content += '#ifndef ' + out_macro + '\n' self.h_content += '#define ' + out_macro + '\n' self.h_content += self.add_section('API ID enumeration', ' ', self.gen_id_enum) self.h_content += '/* Declarations of APIs intended for use only by tools. */\n' self.h_content += 'typedef void (*hsa_amd_queue_intercept_packet_writer)(const void*, uint64_t);\n' self.h_content += 'typedef void (*hsa_amd_queue_intercept_handler)(const void*, uint64_t, uint64_t, void*,\n' self.h_content += ' hsa_amd_queue_intercept_packet_writer);\n' self.h_content += 'typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t*, hsa_agent_t, void*);\n' self.h_content += self.add_section('API arg structure', ' ', self.gen_arg_struct) self.h_content += self.add_section('API output stream', ' ', self.gen_out_stream) self.h_content += '#endif /* ' + out_macro + ' */\n' self.cpp_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n" self.cpp_content += '#include \n' self.cpp_content += '#include \n' self.cpp_content += 'namespace roctracer::hsa_support::detail {\n' self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n' self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n' self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n' self.cpp_content += self.add_section('API callback functions', '', self.gen_callbacks) self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept) self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name) self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code) self.cpp_content += '\n};\n' # add code section def add_section(self, title, gap, fun): content = '' n = 0 content += '\n/* section: ' + title + ' */\n\n' content += fun(-1, '-', '-', {}) for index in range(len(self.api_names)): last = (index == len(self.api_names) - 1) name = self.api_names[index] if n != 0: if gap == '': content += fun(n, name, '-', {}) content += '\n' content += gap + '/* block: ' + name + ' API */\n' for call in self.api_calls[name]: content += fun(n, name, call, self.api_data[call]) n += 1 content += fun(n, '-', '-', {}) return content # generate API ID enumeration def gen_id_enum(self, n, name, call, data): content = '' if n == -1: content += 'enum hsa_api_id_t {\n' return content if call != '-': content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n' else: content += '\n' content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n' content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n' content += '};\n' return content # generate API args structure def gen_arg_struct(self, n, name, call, struct): content = '' if n == -1: content += 'struct hsa_api_data_t {\n' content += ' uint64_t correlation_id;\n' content += ' uint32_t phase;\n' content += ' union {\n' for ret_type in self.api_rettypes: content += ' ' + ret_type + ' ' + ret_type + '_retval;\n' content += ' };\n' content += ' union {\n' return content if call != '-': content += ' struct {\n' for (var, item) in struct['astr'].items(): content += ' ' + item + ';\n' if call == "hsa_amd_memory_async_copy_rect" and item == "const hsa_dim3_t* range": content += ' hsa_dim3_t range__val;\n' content += ' } ' + call + ';\n' else: content += ' } args;\n' content += ' uint64_t *phase_data;\n' content += '};\n' return content # generate API callbacks def gen_callbacks(self, n, name, call, struct): content = '' if n == -1: content += '/* section: Static declarations */\n' content += '\n' if call != '-': call_id = self.api_id[call]; ret_type = struct['ret'] content += 'static ' + ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' content += ' hsa_trace_data_t trace_data;\n' content += ' bool enabled{false};\n' content += '\n' content += ' if (auto function = report_activity.load(std::memory_order_relaxed); function &&\n' content += ' (enabled =\n' content += ' function(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &trace_data) == 0)) {\n' content += ' if (trace_data.phase_enter != nullptr) {\n' for var in struct['alst']: item = struct['astr'][var]; if re.search(r'char\* ', item): # FIXME: we should not strdup the char* arguments here, as the callback will not outlive the scope of this function. Instead, we # should generate a helper function to capture the content of the arguments similar to hipApiArgsInit for HIP. We also need a # helper to free the memory that is allocated to capture the content. content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + '(' + var + ' != NULL) ? strdup(' + var + ')' + ' : NULL;\n' else: content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + var + ';\n' if call == 'hsa_amd_memory_async_copy_rect' and var == 'range': content += ' trace_data.api_data.args.' + call + '.' + var + '__val = ' + '*(' + var + ');\n' content += ' trace_data.phase_enter(' + call_id + ', &trace_data);\n' content += ' }\n' content += ' }\n' content += '\n' if ret_type != 'void': content += ' trace_data.api_data.' + ret_type + '_retval = ' content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n' content += '\n' content += ' if (enabled && trace_data.phase_exit != nullptr)\n' content += ' trace_data.phase_exit(' + call_id + ', &trace_data);\n' if ret_type != 'void': content += ' return trace_data.api_data.' + ret_type + '_retval;\n' content += '}\n' return content # generate API intercepting code def gen_intercept(self, n, name, call, struct): content = '' if n > 0 and call == '-': content += '};\n' if n == 0 or (call == '-' and name != '-'): content += 'static void Install' + name + 'Wrappers(' + name + 'Table* table) {\n' content += ' ' + name + '_saved_before_cb = *table;\n' if call != '-': if call != 'hsa_shut_down': content += ' table->' + call + '_fn = ' + call + '_callback;\n' else: content += ' { void* p = (void*)' + call + '_callback; (void)p; }\n' return content # generate API name function def gen_get_name(self, n, name, call, struct): content = '' if n == -1: content += 'static const char* GetApiName(uint32_t id) {\n' content += ' switch (id) {\n' return content if call != '-': content += ' case ' + self.api_id[call] + ': return "' + call + '";\n' else: content += ' }\n' content += ' return "unknown";\n' content += '}\n' return content # generate API code function def gen_get_code(self, n, name, call, struct): content = '' if n == -1: content += 'static uint32_t GetApiCode(const char* str) {\n' return content if call != '-': content += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n' else: content += ' return HSA_API_ID_NUMBER;\n' content += '}\n' return content # generate stream operator def gen_out_stream(self, n, name, call, struct): content = '' if n == -1: content += '#ifdef __cplusplus\n' content += '#include "hsa_ostream_ops.h"\n' content += 'typedef std::pair hsa_api_data_pair_t;\n' content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n' content += ' const uint32_t cid = data_pair.first;\n' content += ' const hsa_api_data_t& api_data = data_pair.second;\n' content += ' switch(cid) {\n' return content if call != '-': content += ' case ' + self.api_id[call] + ': {\n' content += ' out << "' + call + '(";\n' arg_list = struct['alst'] if len(arg_list) != 0: for ind in range(len(arg_list)): arg_var = arg_list[ind] arg_val = 'api_data.args.' + call + '.' + arg_var if re.search(r'char\* ', struct['astr'][arg_var]): content += ' out << "0x" << std::hex << (uint64_t)' + arg_val else: content += ' out << ' + arg_val if call == "hsa_amd_memory_async_copy_rect" and arg_var == "range": content += ' << ", ";\n' content += ' out << ' + arg_val + '__val' ''' arg_item = struct['tlst'][ind] if re.search(r'\(\* ', arg_item): arg_pref = '' elif re.search(r'void\* ', arg_item): arg_pref = '' elif re.search(r'\*\* ', arg_item): arg_pref = '**' elif re.search(r'\* ', arg_item): arg_pref = '*' else: arg_pref = '' if arg_pref != '': content += ' if (' + arg_val + ') out << ' + arg_pref + '(' + arg_val + '); else out << ' + arg_val else: content += ' out << ' + arg_val ''' if ind < len(arg_list) - 1: content += ' << ", ";\n' else: content += ';\n' if struct['ret'] != 'void': content += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n' else: content += ' out << ") = void";\n' content += ' break;\n' content += ' }\n' else: content += ' default:\n' content += ' out << "ERROR: unknown API";\n' content += ' abort();\n' content += ' }\n' content += ' return out;\n' content += '}\n' content += '#endif\n' return content ############################################################# # main # Usage if len(sys.argv) != 3: print ("Usage:", sys.argv[0], " ", file=sys.stderr) sys.exit(1) else: PREFIX = sys.argv[1] + '/' HSA_DIR = sys.argv[2] + '/' descr = API_DescrParser(H_OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE) out_file = PREFIX + H_OUT print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.h_content[:-1]) f.close() out_file = PREFIX + CPP_OUT print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.cpp_content[:-1]) f.close() ############################################################# ./build.sh0000775000175100017510000000600115176134562012550 0ustar jenkinsjenkins#!/bin/bash -e ################################################################################ # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. ################################################################################ SRC_DIR=`dirname $0` COMPONENT="roctracer" ROCM_PATH="${ROCM_PATH:=/opt/rocm}" LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64" DEFAULTS=defaults.sh fatal() { echo "$1" exit 1 } umask 022 if [ -e "$DEFAULTS" ] ; then source "$DEFAULTS"; fi if [ -z "$ROCTRACER_ROOT" ]; then ROCTRACER_ROOT=$SRC_DIR; fi if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$PWD; fi if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi mkdir -p $BUILD_DIR pushd $BUILD_DIR cmake \ -DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DCMAKE_PREFIX_PATH="$PREFIX_PATH" \ -DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \ -DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \ -DCPACK_GENERATOR="${CPACKGEN:-"DEB;RPM"}" \ -DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \ -DGPU_TARGETS="$GPU_LIST" \ -DCPACK_OBJCOPY_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objcopy" \ -DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \ -DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \ -DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \ $ROCTRACER_ROOT make make mytest make package exit 0 ./src/0000775000175100017510000000000015176134562011704 5ustar jenkinsjenkins./src/tracer_tool/0000775000175100017510000000000015176134562014221 5ustar jenkinsjenkins./src/tracer_tool/trace_buffer.h0000664000175100017510000002372515176134562017032 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef TOOL_TRACE_BUFFER_H_ #define TOOL_TRACE_BUFFER_H_ #include #include #include #include #include #include #include #include #include #include #include #include namespace roctracer { class TraceBufferBase { public: static void FlushAll() { std::lock_guard lock(mutex_); for (auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_) trace_buffer->Flush(); } static void Register(TraceBufferBase* elem) { std::lock_guard lock(mutex_); auto** prev_ptr = &head_; while (*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_) prev_ptr = &(*prev_ptr)->next_; elem->next_ = *prev_ptr; *prev_ptr = elem; } static void Unregister(TraceBufferBase* elem) { std::lock_guard lock(mutex_); auto** prev_ptr = &head_; while (*prev_ptr != nullptr && *prev_ptr != elem) prev_ptr = &(*prev_ptr)->next_; assert(*prev_ptr != nullptr && "elem is not in the list"); *prev_ptr = elem->next_; } TraceBufferBase(std::string name, int priority) : name_(std::move(name)), priority_(priority), next_(nullptr) {} TraceBufferBase(const TraceBufferBase&) = delete; TraceBufferBase& operator=(const TraceBufferBase&) = delete; virtual ~TraceBufferBase() { Unregister(this); } virtual void Flush() = 0; std::string name() && { return std::move(name_); } const std::string& name() const& { return name_; } private: const std::string name_; const int priority_; TraceBufferBase* next_; static TraceBufferBase* head_; static std::mutex mutex_; }; enum TraceEntryState { TRACE_ENTRY_INVALID = 0, TRACE_ENTRY_INIT = 1, TRACE_ENTRY_COMPLETE = 2 }; template > class TraceBuffer : protected TraceBufferBase { public: using callback_t = std::function; TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0) : TraceBufferBase(std::move(name), priority), flush_callback_(std::move(flush_callback)), size_(size) { assert(size_ != 0 && "cannot create an empty trace buffer"); Entry* write_buffer = allocator_.allocate(size_); assert(write_buffer != nullptr); buffer_list_.push_back(write_buffer); read_index_ = 0; write_index_ = {0, write_buffer}; AllocateFreeBuffer(); // Add this instance to the link list of all trace buffers in the process. Register(this); } ~TraceBuffer() override { // Flush the remaining records. After flushing, there should not be any records left in the // trace buffer. Flush(); assert(read_index_ == write_index_.load().index); // Acquire both the writer and worker lock as we are accessing shared variables they protect. std::unique_lock writer_lock(write_mutex_, std::defer_lock); std::unique_lock worker_lock(worker_mutex_, std::defer_lock); std::lock(writer_lock, worker_lock); // Deallocate the buffers. allocator_.deallocate(write_index_.load().buffer, size_); allocator_.deallocate(free_buffer_, size_); // Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional // after waking up, and exits if it does not have a value. if (worker_thread_) { std::thread worker_thread = std::move(worker_thread_.value()); { // Tell the worker thread loop to exit. worker_thread_.reset(); free_buffer_ = nullptr; worker_cond_.notify_one(); } // Release the worker lock to allow the worker thread to exit. worker_lock.unlock(); worker_thread.join(); } } // Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are // monotonically increasing indices, with read_pointer % size always indexing inside the first // buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with // the next invocation after changing its state to 'complete'. void Flush() override { std::lock_guard lock(write_mutex_); auto write_index = write_index_.load(std::memory_order_relaxed); for (auto it = buffer_list_.begin(); it != buffer_list_.end();) { auto end_of_buffer = read_index_ - read_index_ % size_ + size_; while (read_index_ < std::min(write_index.index, end_of_buffer)) { Entry* entry = &(*it)[read_index_ % size_]; // The entry is not yet complete, stop flushing here. if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return; flush_callback_(entry); entry->~Entry(); ++read_index_; } // The buffer is still in use or the read pointer did not reach the end of the buffer. if (*it == write_index.buffer || read_index_ != end_of_buffer) return; // All entries in the current buffer are now processed. Destroy the buffer and move onto the // next buffer in the list. allocator_.deallocate(*it, size_); it = buffer_list_.erase(it); } } template Entry& Emplace(Args... args) { return *new (GetEntry()) Entry(std::forward(args)...); } private: Entry* GetEntry() { auto current = write_index_.load(std::memory_order_relaxed); while (true) { // If the pointer is at the end of the current buffer, switch to the available free buffer and // notify the worker thread to allocate a new buffer. if (current.index != 0 && current.index % size_ == 0) { std::lock_guard lock(write_mutex_); // If the worker thread wasn't already started, start it now. This avoids starting a new // thread when the trace buffer is created. if (!worker_thread_) { std::promise ready; auto future = ready.get_future(); { std::lock_guard worker_lock(worker_mutex_); worker_thread_.emplace(&TraceBuffer::WorkerThreadLoop, this, std::move(ready)); } future.wait(); } // Re-check the pointer overflow under the writer lock, another thread could have beaten us // to it and already bumped the write_index_. current = write_index_.load(std::memory_order_relaxed); if (current.index % size_ == 0) { std::unique_lock worker_lock(worker_mutex_); // Wait for the free buffer to become available. worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; }); current.buffer = free_buffer_; buffer_list_.push_back(current.buffer); write_index_.store({current.index + 1, current.buffer}, std::memory_order_relaxed); // Tell the worker thread to allocate a new free buffer. free_buffer_ = nullptr; worker_cond_.notify_one(); // We successfully allocated a new buffer, return the first element. return ¤t.buffer[0]; } } if (write_index_.compare_exchange_weak(current, {current.index + 1, current.buffer}, std::memory_order_relaxed)) return ¤t.buffer[current.index % size_]; } } void AllocateFreeBuffer() { assert(free_buffer_ == nullptr); free_buffer_ = allocator_.allocate(size_); assert(free_buffer_ != nullptr); for (size_t i = 0; i < size_; ++i) free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed); } void WorkerThreadLoop(std::promise ready) { std::unique_lock lock(worker_mutex_); // This worker thread is now ready to accept work. ready.set_value(); while (true) { worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; }); if (!worker_thread_) break; AllocateFreeBuffer(); worker_cond_.notify_one(); } } // The WriteIndex is used to store both the index and the buffer associated with that index (the // buffer contains the trace buffer records at [index - index % size, index - index % size_t + // size_ - 1]) in a single atomic variable. struct WriteIndex { uint64_t index; Entry* buffer; }; const callback_t flush_callback_; const uint64_t size_; uint64_t read_index_; // The index of the next record to flush. std::atomic write_index_; // The index of the next record that could be written. Entry* free_buffer_{nullptr}; // The next available free buffer. std::optional worker_thread_; std::mutex worker_mutex_; std::condition_variable worker_cond_; std::mutex write_mutex_; std::list buffer_list_; Allocator allocator_; }; } // namespace roctracer #define TRACE_BUFFER_INSTANTIATE() \ roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_ = nullptr; \ std::mutex roctracer::TraceBufferBase::mutex_; #endif // TOOL_TRACE_BUFFER_H_ ./src/tracer_tool/exportmap0000664000175100017510000000010115176134562016153 0ustar jenkinsjenkins{ global: HSA_AMD_TOOL_PRIORITY; OnLoad; OnUnload; local: *; }; ./src/tracer_tool/tracer_tool.cpp0000664000175100017510000006611615176134562017254 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* kernel name demangling */ #include #include #include #include #include #include #include /* SYS_xxx definitions */ #include #include /* usleep */ #include "debug.h" #include "loader.h" #include "trace_buffer.h" #include "xml.h" void initialize() __attribute__((constructor(101))); namespace fs = std::experimental::filesystem; // Macro to check ROC-tracer calls status #define CHECK_ROCTRACER(call) \ do { \ if ((call) != ROCTRACER_STATUS_SUCCESS) { \ fatal(#call " failed: %s", roctracer_error_string()); \ } \ } while (false) TRACE_BUFFER_INSTANTIATE(); namespace { inline roctracer_timestamp_t timestamp_ns() { roctracer_timestamp_t timestamp; CHECK_ROCTRACER(roctracer_get_timestamp(×tamp)); return timestamp; } std::vector hsa_api_vec; std::vector hip_api_vec; bool trace_roctx = false; bool trace_hsa_api = false; bool trace_hsa_activity = false; bool trace_hip_api = false; bool trace_hip_activity = false; bool trace_pcs = false; uint32_t GetPid() { static uint32_t pid = syscall(__NR_getpid); return pid; } uint32_t GetTid() { static thread_local uint32_t tid = syscall(__NR_gettid); return tid; } size_t GetBufferSize() { auto bufSize = getenv("ROCTRACER_BUFFER_SIZE"); // Default size if not set if (!bufSize) return 0x200000; return std::stoll({bufSize}); } // Tracing control thread uint32_t control_delay_us = 0; uint32_t control_len_us = 0; uint32_t control_dist_us = 0; std::thread* trace_period_thread = nullptr; std::atomic_bool trace_period_stop = false; void trace_period_fun() { std::this_thread::sleep_for(std::chrono::microseconds(control_delay_us)); do { roctracer_start(); if (trace_period_stop) { roctracer_stop(); break; } std::this_thread::sleep_for(std::chrono::microseconds(control_len_us)); roctracer_stop(); if (trace_period_stop) break; std::this_thread::sleep_for(std::chrono::microseconds(control_dist_us)); } while (!trace_period_stop); } // Flushing control thread uint32_t control_flush_us = 0; std::thread* flush_thread = nullptr; std::atomic_bool stop_flush_thread = false; void flush_thr_fun() { while (!stop_flush_thread) { CHECK_ROCTRACER(roctracer_flush_activity()); roctracer::TraceBufferBase::FlushAll(); std::this_thread::sleep_until(std::chrono::steady_clock::now() + std::chrono::microseconds(control_flush_us)); } } class roctracer_plugin_t { public: roctracer_plugin_t(const std::string& plugin_path) { plugin_handle_ = dlopen(plugin_path.c_str(), RTLD_LAZY); if (plugin_handle_ == nullptr) { warning("dlopen(\"%s\") failed: %s", plugin_path.c_str(), dlerror()); return; } roctracer_plugin_write_callback_record_ = reinterpret_cast( dlsym(plugin_handle_, "roctracer_plugin_write_callback_record")); if (!roctracer_plugin_write_callback_record_) return; roctracer_plugin_write_activity_records_ = reinterpret_cast( dlsym(plugin_handle_, "roctracer_plugin_write_activity_records")); if (!roctracer_plugin_write_activity_records_) return; roctracer_plugin_finalize_ = reinterpret_cast( dlsym(plugin_handle_, "roctracer_plugin_finalize")); if (!roctracer_plugin_finalize_) return; if (auto* initialize = reinterpret_cast( dlsym(plugin_handle_, "roctracer_plugin_initialize")); initialize != nullptr) valid_ = initialize(ROCTRACER_VERSION_MAJOR, ROCTRACER_VERSION_MINOR) == 0; } ~roctracer_plugin_t() { if (is_valid()) roctracer_plugin_finalize_(); if (plugin_handle_ != nullptr) dlclose(plugin_handle_); } bool is_valid() const { return valid_; } template auto write_callback_record(Args... args) const { assert(is_valid()); return roctracer_plugin_write_callback_record_(std::forward(args)...); } template auto write_activity_records(Args... args) const { assert(is_valid()); return roctracer_plugin_write_activity_records_(std::forward(args)...); } private: bool valid_{false}; void* plugin_handle_; decltype(roctracer_plugin_finalize)* roctracer_plugin_finalize_; decltype(roctracer_plugin_write_callback_record)* roctracer_plugin_write_callback_record_; decltype(roctracer_plugin_write_activity_records)* roctracer_plugin_write_activity_records_; }; std::optional plugin; } // namespace /////////////////////////////////////////////////////////////////////////////////////////////////////// // rocTX annotation tracing struct roctx_trace_entry_t { std::atomic valid; roctracer_record_t record; union { roctx_api_data_t data; }; roctx_trace_entry_t(uint32_t cid, roctracer_timestamp_t time, uint32_t pid, uint32_t tid, roctx_range_id_t rid, const char* message) : valid(roctracer::TRACE_ENTRY_INIT) { record.domain = ACTIVITY_DOMAIN_ROCTX; record.op = cid; record.kind = 0; record.begin_ns = time; record.end_ns = 0; record.process_id = pid; record.thread_id = tid; data.args.message = message != nullptr ? strdup(message) : nullptr; data.args.id = rid; } ~roctx_trace_entry_t() { if (data.args.message != nullptr) free(const_cast(data.args.message)); } }; roctracer::TraceBuffer roctx_trace_buffer( "rocTX API", GetBufferSize(), [](roctx_trace_entry_t* entry) { assert(plugin && "plugin is not initialized"); plugin->write_callback_record(&entry->record, &entry->data); }); // rocTX callback function void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* /* user_arg */) { const roctx_api_data_t* data = reinterpret_cast(callback_data); roctx_trace_entry_t& entry = roctx_trace_buffer.Emplace(cid, timestamp_ns(), GetPid(), GetTid(), data->args.id, data->args.message); entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } /////////////////////////////////////////////////////////////////////////////////////////////////////// // HSA API tracing struct hsa_api_trace_entry_t { std::atomic valid; roctracer_record_t record; union { hsa_api_data_t data; }; hsa_api_trace_entry_t(uint32_t cid, roctracer_timestamp_t begin, roctracer_timestamp_t end, uint32_t pid, uint32_t tid, const hsa_api_data_t& hsa_api_data) : valid(roctracer::TRACE_ENTRY_INIT) { record.domain = ACTIVITY_DOMAIN_HSA_API; record.op = cid; record.kind = 0; record.begin_ns = begin; record.end_ns = end; record.process_id = pid; record.thread_id = tid; data = hsa_api_data; } ~hsa_api_trace_entry_t() {} }; roctracer::TraceBuffer hsa_api_trace_buffer( "HSA API", GetBufferSize(), [](hsa_api_trace_entry_t* entry) { assert(plugin && "plugin is not initialized"); plugin->write_callback_record(&entry->record, &entry->data); }); // HSA API callback function void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const hsa_api_data_t* data = reinterpret_cast(callback_data); if (data->phase == ACTIVITY_API_PHASE_ENTER) { *data->phase_data = timestamp_ns(); } else { const roctracer_timestamp_t begin_timestamp = *data->phase_data; const roctracer_timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? begin_timestamp : timestamp_ns(); hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace(cid, begin_timestamp, end_timestamp, GetPid(), GetTid(), *data); entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } } /////////////////////////////////////////////////////////////////////////////////////////////////////// // HIP API tracing struct hip_api_trace_entry_t { std::atomic valid; roctracer_record_t record; union { hip_api_data_t data; }; hip_api_trace_entry_t(uint32_t cid, roctracer_timestamp_t begin, roctracer_timestamp_t end, uint32_t pid, uint32_t tid, const hip_api_data_t& hip_api_data, const char* name) : valid(roctracer::TRACE_ENTRY_INIT) { record.domain = ACTIVITY_DOMAIN_HIP_API; record.op = cid; record.kind = 0; record.begin_ns = begin; record.end_ns = end; record.process_id = pid; record.thread_id = tid; data = hip_api_data; record.kernel_name = name ? strdup(name) : nullptr; } ~hip_api_trace_entry_t() { if (record.kernel_name != nullptr) free(const_cast(record.kernel_name)); } }; static std::string getKernelNameMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices) { std::stringstream name_str; for (int i = 0; i < numDevices; ++i) { if (launchParamsList[i].func != nullptr) { name_str << roctracer::HipLoader::Instance().KernelNameRefByPtr(launchParamsList[i].func) << ":" << roctracer::HipLoader::Instance().GetStreamDeviceId(launchParamsList[i].stream) << ";"; } } return name_str.str(); } template struct Overloaded : Ts... { using Ts::operator()...; }; template Overloaded(Ts...) -> Overloaded; static std::optional getKernelName(uint32_t cid, const hip_api_data_t* data) { std::variant function; switch (cid) { case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: { return getKernelNameMultiKernelMultiDevice( data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList, data->args.hipExtLaunchMultiKernelMultiDevice.numDevices); } case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: { return getKernelNameMultiKernelMultiDevice( data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList, data->args.hipLaunchCooperativeKernelMultiDevice.numDevices); } case HIP_API_ID_hipLaunchKernel: { function = data->args.hipLaunchKernel.function_address; break; } case HIP_API_ID_hipExtLaunchKernel: { function = data->args.hipExtLaunchKernel.function_address; break; } case HIP_API_ID_hipLaunchCooperativeKernel: { function = data->args.hipLaunchCooperativeKernel.f; break; } case HIP_API_ID_hipLaunchByPtr: { function = data->args.hipLaunchByPtr.hostFunction; break; } case HIP_API_ID_hipGraphAddKernelNode: { function = data->args.hipGraphAddKernelNode.pNodeParams->func; break; } case HIP_API_ID_hipGraphExecKernelNodeSetParams: { function = data->args.hipGraphExecKernelNodeSetParams.pNodeParams->func; break; } case HIP_API_ID_hipGraphKernelNodeSetParams: { function = data->args.hipGraphKernelNodeSetParams.pNodeParams->func; break; } case HIP_API_ID_hipModuleLaunchKernel: { function = data->args.hipModuleLaunchKernel.f; break; } case HIP_API_ID_hipExtModuleLaunchKernel: { function = data->args.hipExtModuleLaunchKernel.f; break; } case HIP_API_ID_hipHccModuleLaunchKernel: { function = data->args.hipHccModuleLaunchKernel.f; break; } default: return {}; } return std::visit( Overloaded{ [](const void* func) { return roctracer::HipLoader::Instance().KernelNameRefByPtr(func); }, [](hipFunction_t func) { return roctracer::HipLoader::Instance().KernelNameRef(func); }, }, function); } roctracer::TraceBuffer hip_api_trace_buffer( "HIP API", GetBufferSize(), [](hip_api_trace_entry_t* entry) { assert(plugin && "plugin is not initialized"); plugin->write_callback_record(&entry->record, &entry->data); }); void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const hip_api_data_t* data = reinterpret_cast(callback_data); const roctracer_timestamp_t timestamp = timestamp_ns(); std::optional kernel_name; if (data->phase == ACTIVITY_API_PHASE_ENTER) { *data->phase_data = timestamp; } else { // Post init of HIP APU args hipApiArgsInit((hip_api_id_t)cid, const_cast(data)); kernel_name = getKernelName(cid, data); hip_api_trace_entry_t& entry = hip_api_trace_buffer.Emplace(cid, *data->phase_data, timestamp, GetPid(), GetTid(), *data, kernel_name ? kernel_name->c_str() : nullptr); entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } } /////////////////////////////////////////////////////////////////////////////////////////////////////// // Input parser std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) { const std::string space_chars_set = " \t"; const size_t first_pos = token.find_first_not_of(space_chars_set); size_t norm_len = 0; std::string error_str = "none"; if (first_pos != std::string::npos) { const size_t last_pos = token.find_last_not_of(space_chars_set); if (last_pos == std::string::npos) error_str = "token string error: \"" + token + "\""; else { const size_t end_pos = last_pos + 1; if (end_pos <= first_pos) error_str = "token string error: \"" + token + "\""; else norm_len = end_pos - first_pos; } } if (((first_pos != std::string::npos) && (norm_len == 0)) || ((first_pos == std::string::npos) && not_empty)) { error("normalize_token error: %s", error_str.c_str()); } return (norm_len != 0) ? token.substr(first_pos, norm_len) : std::string(""); } int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const std::string& delim, std::vector* vec, const char* label = nullptr) { int parse_iter = 0; const auto& opts = node->opts; auto it = opts.find(field); if (it != opts.end()) { const std::string& array_string = it->second; if (label != nullptr) std::cout << label << field << " = " << array_string << std::endl; size_t pos1 = 0; size_t string_len = array_string.length(); while (pos1 < string_len) { // set pos2 such that it also handles case of multiple delimiter options. // For example- "hipLaunchKernel, hipExtModuleLaunchKernel, hipMemsetAsync" // in this example delimiters are ' ' and also ',' size_t pos2 = array_string.find_first_of(delim, pos1); bool found = (pos2 != std::string::npos); size_t token_len = (pos2 != std::string::npos) ? pos2 - pos1 : string_len - pos1; std::string token = array_string.substr(pos1, token_len); std::string norm_str = normalize_token(token, found, "get_xml_array"); if (norm_str.length() != 0) vec->push_back(norm_str); if (!found) break; // update pos2 such that it represents the first non-delimiter character // in case multiple delimiters are specified in variable 'delim' pos1 = array_string.find_first_not_of(delim, pos2); ++parse_iter; } } return parse_iter; } // Allocating tracing pool void open_tracing_pool() { if (roctracer_default_pool() == nullptr) { roctracer_properties_t properties{}; properties.buffer_size = GetBufferSize(); properties.buffer_callback_fun = [](const char* begin, const char* end, void* /* arg */) { assert(plugin && "plugin is not initialized"); plugin->write_activity_records(reinterpret_cast(begin), reinterpret_cast(end)); }; CHECK_ROCTRACER(roctracer_open_pool(&properties)); } } // Flush tracing pool void close_tracing_pool() { if (roctracer_pool_t* pool = roctracer_default_pool(); pool != nullptr) { CHECK_ROCTRACER(roctracer_flush_activity_expl(pool)); CHECK_ROCTRACER(roctracer_close_pool_expl(pool)); } } // tool library is loaded static bool is_loaded = false; // tool unload method void tool_unload() { if (is_loaded == false) return; is_loaded = false; if (flush_thread) { stop_flush_thread = true; flush_thread->join(); delete flush_thread; flush_thread = nullptr; } if (trace_period_thread) { trace_period_stop = true; trace_period_thread->join(); delete trace_period_thread; trace_period_thread = nullptr; } if (trace_roctx) { CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); } if (trace_hsa_api) { CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); } if (trace_hsa_activity || trace_pcs) { CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); } if (trace_hip_api || trace_hip_activity) { CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); } // Flush tracing pool close_tracing_pool(); roctracer::TraceBufferBase::FlushAll(); } // tool load method void tool_load() { if (is_loaded == true) return; is_loaded = true; // API traces switches const char* trace_domain = getenv("ROCTRACER_DOMAIN"); if (trace_domain != nullptr) { // ROCTX domain if (std::string(trace_domain).find("roctx") != std::string::npos) { trace_roctx = true; } // HSA/HIP domains enabling if (std::string(trace_domain).find("hsa") != std::string::npos) { trace_hsa_api = true; trace_hsa_activity = true; } if (std::string(trace_domain).find("hip") != std::string::npos) { trace_hip_api = true; trace_hip_activity = true; } if (std::string(trace_domain).find("sys") != std::string::npos) { trace_hsa_api = true; trace_hip_api = true; trace_hip_activity = true; } // PC sampling enabling if (std::string(trace_domain).find("pcs") != std::string::npos) { trace_pcs = true; } } std::cout << "ROCtracer (" << std::dec << GetPid() << "):"; // XML input const char* xml_name = getenv("ROCP_INPUT"); if (xml_name != nullptr) { xml::Xml* xml = xml::Xml::Create(xml_name); if (xml == nullptr) error("input file not found '%s'", xml_name); bool found = false; for (const auto* entry : xml->GetNodes("top.trace")) { auto it = entry->opts.find("name"); if (it == entry->opts.end()) error("trace name is missing"); const std::string& name = it->second; std::vector api_vec; for (const auto* node : entry->nodes) { if (node->tag != "parameters") error("trace node is not supported '%s:%%%s'", name.c_str(), node->tag.c_str()); get_xml_array(node, "api", ", ", &api_vec); // delimiter options given as both spaces and commas (' ' and ',') break; } if (name == "rocTX") { found = true; trace_roctx = true; } if (name == "HSA") { found = true; trace_hsa_api = true; hsa_api_vec = api_vec; } if (name == "GPU") { found = true; trace_hsa_activity = true; } if (name == "HIP") { found = true; trace_hip_api = true; trace_hip_activity = true; hip_api_vec = api_vec; } } if (found) std::cout << " input from \"" << xml_name << "\""; } std::cout << std::endl; // Disable HIP activity if HSA activity was set if (trace_hsa_activity == true) trace_hip_activity = false; // Enable rpcTX callbacks if (trace_roctx) { // initialize HSA tracing std::cout << " rocTX-trace()" << std::endl; CHECK_ROCTRACER( roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, nullptr)); } const char* ctrl_str = getenv("ROCP_CTRL_RATE"); if (ctrl_str != nullptr) { uint32_t ctrl_delay = 0; uint32_t ctrl_len = 0; uint32_t ctrl_rate = 0; if (sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate) != 3 || ctrl_len > ctrl_rate) error("invalid ROCP_CTRL_RATE variable (ctrl_delay:ctrl_len:ctrl_rate)"); control_dist_us = ctrl_rate - ctrl_len; control_len_us = ctrl_len; control_delay_us = ctrl_delay; roctracer_stop(); if (ctrl_delay != UINT32_MAX) { std::cout << "ROCtracer: trace control: delay(" << ctrl_delay << "us), length(" << ctrl_len << "us), rate(" << ctrl_rate << "us)" << std::endl; trace_period_thread = new std::thread(trace_period_fun); } else { std::cout << "ROCtracer: trace start disabled" << std::endl; } } const char* flush_str = getenv("ROCP_FLUSH_RATE"); if (flush_str != nullptr) { sscanf(flush_str, "%d", &control_flush_us); if (control_flush_us == 0) error("invalid control flush rate value '%s'", flush_str); std::cout << "ROCtracer: trace control flush rate(" << control_flush_us << "us)" << std::endl; flush_thread = new std::thread(flush_thr_fun); } } extern "C" { // The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader // itself, not by code during _init. 'extern const' seems to do that although that is not a // guarantee. ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050; // HSA-runtime tool on-load method ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { if (roctracer_version_major() != ROCTRACER_VERSION_MAJOR || roctracer_version_minor() < ROCTRACER_VERSION_MINOR) { warning("the ROCtracer API version is not compatible with this tool"); return true; } // Load output plugin const char* plugin_name = getenv("ROCTRACER_PLUGIN_LIB"); if (plugin_name == nullptr) plugin_name = "libfile_plugin.so"; if (Dl_info dl_info; dladdr((void*)tool_load, &dl_info) != 0) { if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid()) plugin.reset(); } tool_load(); // OnUnload may not be called if the ROC runtime is not shutdown by the client // application before exiting, so register an atexit handler to unload the tool. std::atexit(tool_unload); // Enable HSA API callbacks/activity if (trace_hsa_api) { std::ostringstream out; out << " HSA-trace("; if (hsa_api_vec.size() != 0) { out << "-*"; for (unsigned i = 0; i < hsa_api_vec.size(); ++i) { uint32_t cid = HSA_API_ID_NUMBER; const char* api = hsa_api_vec[i].c_str(); if (roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr) == ROCTRACER_STATUS_SUCCESS && roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr) == ROCTRACER_STATUS_SUCCESS) out << ' ' << api; else warning("Unable to enable HSA_API tracing for invalid operation %s", api); } } else { CHECK_ROCTRACER( roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr)); out << "*"; } std::cout << out.str() << ')' << std::endl; } // Enable HSA GPU activity if (trace_hsa_activity) { // Allocating tracing pool open_tracing_pool(); std::cout << " HSA-activity-trace()" << std::endl; CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY)); } // Enable HIP API callbacks/activity if (trace_hip_api || trace_hip_activity) { std::ostringstream out; out << " HIP-trace("; // Allocating tracing pool open_tracing_pool(); // Enable tracing if (trace_hip_api) { if (hip_api_vec.size() != 0) { out << "-*"; for (unsigned i = 0; i < hip_api_vec.size(); ++i) { uint32_t cid = HIP_API_ID_NONE; const char* api = hip_api_vec[i].c_str(); if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, nullptr) == ROCTRACER_STATUS_SUCCESS && roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback, nullptr) == ROCTRACER_STATUS_SUCCESS) out << ' ' << api; else warning("Unable to enable HIP_API tracing for invalid operation %s", api); } } else { CHECK_ROCTRACER( roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr)); out << "*"; } } if (trace_hip_activity) { CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); } std::cout << out.str() << ')' << std::endl; } // Enable PC sampling if (trace_pcs) { std::cout << " PCS-trace()" << std::endl; open_tracing_pool(); CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_RESERVED1)); } return true; } // HSA-runtime on-unload method ROCTRACER_EXPORT void OnUnload() { tool_unload(); } } // extern "C" void initialize() { tool_load(); } ./src/util/0000775000175100017510000000000015176134562012661 5ustar jenkinsjenkins./src/util/debug.cpp0000664000175100017510000000760015176134562014456 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "debug.h" #include "util.h" #include #include #include #include #include #if defined(ENABLE_BACKTRACE) #include #include namespace { struct BackTraceInfo { struct ::backtrace_state* state = nullptr; std::stringstream sstream{}; int depth = 0; int error = 0; }; void errorCallback(void* data, const char* message, int errnum) { BackTraceInfo* info = static_cast(data); info->sstream << "ROCtracer error: " << message << '(' << errnum << ')'; info->error = 1; } void syminfoCallback(void* data, uintptr_t /* pc */, const char* symname, uintptr_t /* symval */, uintptr_t /* symsize */) { BackTraceInfo* info = static_cast(data); if (symname == nullptr) return; int status; char* demangled = abi::__cxa_demangle(symname, nullptr, nullptr, &status); info->sstream << ' ' << (status == 0 ? demangled : symname); free(demangled); } int fullCallback(void* data, uintptr_t pc, const char* filename, int lineno, const char* function) { BackTraceInfo* info = static_cast(data); info->sstream << std::endl << " #" << std::dec << info->depth++ << ' ' << "0x" << std::noshowbase << std::hex << std::setfill('0') << std::setw(sizeof(pc) * 2) << pc; if (function == nullptr) backtrace_syminfo(info->state, pc, syminfoCallback, errorCallback, data); else { int status; char* demangled = abi::__cxa_demangle(function, nullptr, nullptr, &status); info->sstream << ' ' << (status == 0 ? demangled : function); free(demangled); if (filename != nullptr) { info->sstream << " in " << filename; if (lineno) info->sstream << ':' << std::dec << lineno; } } return info->error; } } // namespace #endif // defined (ENABLE_BACKTRACE) namespace roctracer { void warning(const char* format, ...) { va_list va; va_start(va, format); std::cerr << "ROCtracer warning: " << string_vprintf(format, va) << std::endl; va_end(va); } void error(const char* format, ...) { va_list va; va_start(va, format); std::cerr << "ROCtracer error: " << string_vprintf(format, va) << std::endl; va_end(va); exit(EXIT_FAILURE); } void fatal [[noreturn]] (const char* format, ...) { va_list va; va_start(va, format); std::string message = string_vprintf(format, va); va_end(va); #if defined(ENABLE_BACKTRACE) BackTraceInfo info; info.sstream << std::endl << "Backtrace:"; info.state = ::backtrace_create_state("/proc/self/exe", 0, errorCallback, &info); ::backtrace_full(info.state, 1, fullCallback, errorCallback, &info); message += info.sstream.str(); #endif /* defined (ENABLE_BACKTRACE) */ std::cerr << "ROCtracer fatal error: " << message << std::endl; abort(); } } // namespace roctracer./src/util/logger.h0000664000175100017510000001305315176134562014313 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SRC_UTIL_LOGGER_H_ #define SRC_UTIL_LOGGER_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace roctracer::util { class Logger { public: template Logger& operator<<(T&& m) { std::ostringstream oss; oss << std::forward(m); if (!streaming_) Log(oss.str()); else Put(oss.str()); streaming_ = true; return *this; } using manip_t = void (*)(); Logger& operator<<(manip_t f) { f(); return *this; } static void begm() { Instance().ResetStreaming(true); } static void endl() { Instance().ResetStreaming(false); } const std::string& LastMessage() { std::lock_guard lock(mutex_); return message_[GetTid()]; } static Logger& Instance() { static Logger instance; return instance; } static uint32_t GetPid() { return syscall(__NR_getpid); } static uint32_t GetTid() { return syscall(__NR_gettid); } private: Logger() : file_(nullptr), dirty_(false), streaming_(false), messaging_(false) { const char* var = getenv("ROCTRACER_LOG"); if (var != nullptr) file_ = fopen("/tmp/roctracer_log.txt", "a"); ResetStreaming(false); } ~Logger() { if (file_ != nullptr) { if (dirty_) Put("\n"); fclose(file_); } } void ResetStreaming(const bool messaging) { std::lock_guard lock(mutex_); if (messaging) { message_[GetTid()] = ""; } else if (streaming_) { Put("\n"); dirty_ = false; } messaging_ = messaging; streaming_ = messaging; } void Put(const std::string& m) { std::lock_guard lock(mutex_); if (messaging_) { message_[GetTid()] += m; } if (file_ != nullptr) { dirty_ = true; flock(fileno(file_), LOCK_EX); fprintf(file_, "%s", m.c_str()); fflush(file_); flock(fileno(file_), LOCK_UN); } } void Log(const std::string& m) { const time_t rawtime = time(nullptr); tm tm_info; localtime_r(&rawtime, &tm_info); char tm_str[26]; strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info); std::ostringstream oss; oss << "<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m; Put(oss.str()); } FILE* file_; bool dirty_; bool streaming_; bool messaging_; std::recursive_mutex mutex_; std::map message_; }; } // namespace roctracer::util #define FATAL_LOGGING(stream) \ do { \ roctracer::util::Logger::Instance() \ << "fatal: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \ abort(); \ } while (false) #define ERR_LOGGING(stream) \ do { \ roctracer::util::Logger::Instance() \ << "error: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \ } while (false) #define INFO_LOGGING(stream) \ do { \ roctracer::util::Logger::Instance() \ << "info: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \ } while (false) #define WARN_LOGGING(stream) \ do { \ std::cerr << "ROCProfiler: " << stream << std::endl; \ roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \ << roctracer::util::Logger::endl; \ } while (false) #endif // SRC_UTIL_LOGGER_H_ ./src/util/util.cpp0000664000175100017510000000314415176134562014344 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "util.h" #include #include #include namespace roctracer { std::string string_vprintf(const char* format, va_list va) { va_list copy; va_copy(copy, va); size_t size = vsnprintf(NULL, 0, format, copy); va_end(copy); std::string str(size, '\0'); vsprintf(&str[0], format, va); return str; } std::string string_printf(const char* format, ...) { va_list va; va_start(va, format); std::string str(string_vprintf(format, va)); va_end(va); return str; } } // namespace roctracer ./src/util/xml.h0000664000175100017510000002677415176134562013652 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef TEST_UTIL_XML_H_ #define TEST_UTIL_XML_H_ #include #include #include #include #include #include #include #include #include #include #include namespace xml { class Xml { public: typedef std::vector token_t; struct level_t; typedef std::vector nodes_t; typedef std::map opts_t; struct level_t { std::string tag; nodes_t nodes; opts_t opts; }; typedef std::vector nodes_vec_t; typedef std::map map_t; enum { DECL_STATE, BODY_STATE }; static Xml* Create(const std::string& file_name, const Xml* obj = NULL) { Xml* xml = new Xml(file_name, obj); if (xml != NULL) { if (xml->Init() == false) { delete xml; xml = NULL; } else { const std::size_t pos = file_name.rfind('/'); const std::string path = (pos != std::string::npos) ? file_name.substr(0, pos + 1) : ""; xml->PreProcess(); nodes_t incl_nodes; for (auto* node : xml->GetNodes("top.include")) { if (node->opts.find("touch") == node->opts.end()) { node->opts["touch"] = ""; incl_nodes.push_back(node); } } for (auto* incl : incl_nodes) { const std::string& incl_name = path + incl->opts["file"]; Xml* ixml = Create(incl_name, xml); if (ixml == NULL) { delete xml; xml = NULL; break; } else { delete ixml; } } if (xml) { xml->Process(); } } } return xml; } static void Destroy(Xml* xml) { delete xml; } std::string GetName() { return file_name_; } void AddExpr(const std::string& full_tag, const std::string& name, const std::string& expr) { const std::size_t pos = full_tag.rfind('.'); const std::size_t pos1 = (pos == std::string::npos) ? 0 : pos + 1; const std::string level_tag = full_tag.substr(pos1); level_t* level = new level_t; (*map_)[full_tag].push_back(level); level->tag = level_tag; level->opts["name"] = name; level->opts["expr"] = expr; } void AddConst(const std::string& full_tag, const std::string& name, const uint64_t& val) { std::ostringstream oss; oss << val; AddExpr(full_tag, name, oss.str()); } nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; } template F ForEach(const F& f_i) { F f = f_i; if (map_) { for (auto& entry : *map_) { for (auto node : entry.second) { if (f.fun(entry.first, node) == false) break; } } } return f; } template F ForEach(const F& f_i) const { F f = f_i; if (map_) { for (auto& entry : *map_) { for (auto node : entry.second) { if (f.fun(entry.first, node) == false) break; } } } return f; } struct print_func { bool fun(const std::string& global_tag, level_t* node) { for (auto& opt : node->opts) { std::cout << global_tag << "." << opt.first << " = " << opt.second << std::endl; } return true; } }; void Print() const { std::cout << "XML file '" << file_name_ << "':" << std::endl; ForEach(print_func()); } private: Xml(const std::string& file_name, const Xml* obj) : file_name_(file_name), file_line_(0), data_size_(0), index_(0), state_(BODY_STATE), comment_(false), included_(false), level_(NULL), map_(NULL) { if (obj != NULL) { map_ = obj->map_; level_ = obj->level_; included_ = true; } } struct delete_func { bool fun(const std::string&, level_t* node) { delete node; return true; } }; ~Xml() { if (included_ == false) { ForEach(delete_func()); delete map_; } } bool Init() { fd_ = open(file_name_.c_str(), O_RDONLY); if (fd_ == -1) { // perror((std::string("open XML file ") + file_name_).c_str()); return false; } if (map_ == NULL) { map_ = new map_t; if (map_ == NULL) return false; AddLevel("top"); } return true; } void PreProcess() { uint32_t ind = 0; char buf[kBufSize]; bool error = false; while (1) { const uint32_t pos = lseek(fd_, 0, SEEK_CUR); uint32_t size = read(fd_, buf, kBufSize); if (size <= 0) break; buf[size - 1] = '\0'; if (strncmp(buf, "#include \"", 10) == 0) { for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) { } if (ind == size) { fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize); error = true; break; } buf[ind] = '\0'; size = ind; lseek(fd_, pos + ind + 1, SEEK_SET); for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) { } if (ind == size) { error = true; break; } buf[ind] = '\0'; AddLevel("include"); AddOption("file", &buf[10]); UpLevel(); } } if (error) { fprintf(stderr, "XML PreProcess failed, line '%s'\n", buf); exit(1); } lseek(fd_, 0, SEEK_SET); } void Process() { token_t remainder; while (1) { token_t token = (remainder.size()) ? remainder : NextToken(); remainder.clear(); // token_t token1 = token; // token1.push_back('\0'); // std::cout << "> " << &token1[0] << std::endl; // End of file if (token.size() == 0) break; switch (state_) { case BODY_STATE: if (token[0] == '<') { bool node_begin = true; unsigned ind = 1; if (token[1] == '/') { node_begin = false; ++ind; } unsigned i = ind; while (i < token.size()) { if (token[i] == '>') break; ++i; } for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]); if (i == token.size()) { if (node_begin) state_ = DECL_STATE; else BadFormat(token); token.push_back('\0'); } else { token[i] = '\0'; } const char* tag = &token[ind]; if (node_begin) { AddLevel(tag); } else { if (strncmp(CurrentLevel().c_str(), tag, strlen(tag)) != 0) { token.back() = '>'; BadFormat(token); } UpLevel(); } } else { BadFormat(token); } break; case DECL_STATE: if (token[0] == '>') { state_ = BODY_STATE; for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]); continue; } else { token.push_back('\0'); unsigned j = 0; for (j = 0; j < token.size(); ++j) if (token[j] == '=') break; if (j == token.size()) BadFormat(token); token[j] = '\0'; const char* key = &token[0]; const char* value = &token[j + 1]; AddOption(key, value); } break; default: std::cout << "XML parser error: wrong state: " << state_ << std::endl; exit(1); } } } bool SpaceCheck() const { bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t')); return cond; } bool LineEndCheck() { bool found = false; if (buffer_[index_] == '\n') { buffer_[index_] = ' '; ++file_line_; found = true; comment_ = false; } else if (comment_ || (buffer_[index_] == '#')) { found = true; comment_ = true; } return found; } token_t NextToken() { token_t token; bool in_string = false; bool special_symb = false; while (1) { if (data_size_ == 0) { data_size_ = read(fd_, buffer_, kBufSize); if (data_size_ <= 0) break; } if (token.empty()) { while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) { ++index_; } } while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) { const char symb = buffer_[index_]; bool skip_symb = false; switch (symb) { case '\\': if (special_symb) { special_symb = false; } else { special_symb = true; skip_symb = true; } break; case '"': if (special_symb) { special_symb = false; } else { in_string = !in_string; if (!in_string) { buffer_[index_] = ' '; --index_; } skip_symb = true; } break; } if (!skip_symb) token.push_back(symb); ++index_; } if (index_ == data_size_) { index_ = 0; data_size_ = 0; } else { if (special_symb || in_string) BadFormat(token); break; } } return token; } void BadFormat(token_t token) { token.push_back('\0'); std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl; exit(1); } void AddLevel(const std::string& tag) { level_t* level = new level_t; level->tag = tag; if (level_) { level_->nodes.push_back(level); stack_.push_back(level_); } level_ = level; std::string global_tag; for (level_t* level : stack_) { global_tag += level->tag + "."; } global_tag += tag; (*map_)[global_tag].push_back(level_); } void UpLevel() { level_ = stack_.back(); stack_.pop_back(); } std::string CurrentLevel() const { return level_->tag; } void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; } const std::string file_name_; unsigned file_line_; int fd_; static const size_t kBufSize = 256; char buffer_[kBufSize]; unsigned data_size_; unsigned index_; unsigned state_; bool comment_; std::vector stack_; bool included_; level_t* level_; map_t* map_; }; } // namespace xml #endif // TEST_UTIL_XML_H_ ./src/util/debug.h0000664000175100017510000000321615176134562014122 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once namespace roctracer { extern void warning(const char* format, ...) #if defined(__GNUC__) __attribute__((format(printf, 1, 2))) #endif // defined (__GNUC__) ; extern void error [[noreturn]] (const char* format, ...) #if defined(__GNUC__) __attribute__((format(printf, 1, 2))) #endif // defined (__GNUC__) ; extern void fatal [[noreturn]] (const char* format, ...) #if defined(__GNUC__) __attribute__((format(printf, 1, 2))) #endif // defined (__GNUC__) ; } // namespace roctracer using roctracer::error; using roctracer::fatal; using roctracer::warning; ./src/util/util.h0000664000175100017510000000261515176134562014013 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include #include namespace roctracer { extern std::string string_vprintf(const char* format, va_list va); extern std::string string_printf(const char* format, ...) #if defined(__GNUC__) __attribute__((format(printf, 1, 2))) #endif // defined (__GNUC__) ; } // namespace roctracer./src/CMakeLists.txt0000664000175100017510000002345015176134562014450 0ustar jenkinsjenkins################################################################################ ## Copyright (c) 2018-2022 Advanced Micro Devices, Inc. ## ## Permission is hereby granted, free of charge, to any person obtaining a copy ## of this software and associated documentation files (the "Software"), to ## deal in the Software without restriction, including without limitation the ## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ## sell copies of the Software, and to permit persons to whom the Software is ## furnished to do so, subject to the following conditions: ## ## The above copyright notice and this permission notice shall be included in ## all copies or substantial portions of the Software. ## ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ## IN THE SOFTWARE. ################################################################################ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) if(${LIBRARY_TYPE} STREQUAL STATIC) add_compile_definitions(STATIC_BUILD=1) endif() option(DEBUG_TRACE "Enable debug tracing") if(DEBUG_TRACE) add_compile_definitions(DEBUG_TRACE_ON=1) endif() find_package(Python3 COMPONENTS Interpreter REQUIRED) execute_process(COMMAND ${Python3_EXECUTABLE} -c "import CppHeaderParser" RESULT_VARIABLE CPP_HEADER_PARSER OUTPUT_QUIET) if(NOT ${CPP_HEADER_PARSER} EQUAL 0) message(FATAL_ERROR "\ The \"CppHeaderParser\" Python3 package is not installed. \ Please install it using the following command: \"pip3 install CppHeaderParser\".\ ") endif() get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES) find_file(HSA_H hsa.h PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES} PATH_SUFFIXES hsa NO_DEFAULT_PATH REQUIRED) get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY) ## Generate the HSA wrapper functions header add_custom_command( OUTPUT hsa_prof_str.h hsa_prof_str.inline.h COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/hsaap.py ${CMAKE_CURRENT_BINARY_DIR} "${HSA_RUNTIME_INC_PATH}" > /dev/null DEPENDS ${PROJECT_SOURCE_DIR}/script/hsaap.py "${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_image.h" "${HSA_RUNTIME_INC_PATH}/hsa_api_trace.h" COMMENT "Generating hsa_prof_str.h,hsa_prof_str.inline.h...") ## Generate the HSA pretty printers add_custom_command( OUTPUT hsa_ostream_ops.h COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h" -o hsa_ext_amd.h.i BYPRODUCTS hsa.h.i hsa_ext_amd.h.i COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py -in hsa.h.i,hsa_ext_amd.h.i -out hsa_ostream_ops.h > /dev/null DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py "${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h" COMMENT "Generating hsa_ostream_ops.h...") get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES) find_file(HIP_RUNTIME_API_H hip_runtime_api.h PATHS ${HIP_INCLUDE_DIRECTORIES} PATH_SUFFIXES hip NO_DEFAULT_PATH REQUIRED) ## Generate the HIP pretty printers add_custom_command( OUTPUT hip_ostream_ops.h COMMAND ${CMAKE_C_COMPILER} "$<$:-I$-I>>" -E "${CMAKE_CURRENT_SOURCE_DIR}/roctracer/hip_full_api.h" -D__HIP_PLATFORM_AMD__=1 -D__HIP_ROCclr__=1 -o hip_runtime_api.h.i BYPRODUCTS hip_runtime_api.h.i COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py -in hip_runtime_api.h.i -out hip_ostream_ops.h > /dev/null DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py "${HIP_RUNTIME_API_H}" COMMENT "Generating hip_ostream_ops.h..." COMMAND_EXPAND_LISTS) set(PUBLIC_HEADERS roctx.h roctracer.h roctracer_ext.h roctracer_hip.h roctracer_hcc.h roctracer_hsa.h roctracer_roctx.h roctracer_plugin.h ext/prof_protocol.h) foreach(header ${PUBLIC_HEADERS}) get_filename_component(header_subdir ${header} DIRECTORY) install(FILES ${PROJECT_SOURCE_DIR}/inc/${header} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/${header_subdir} COMPONENT dev) endforeach() set(GENERATED_HEADERS hip_ostream_ops.h hsa_prof_str.h hsa_ostream_ops.h) foreach(header ${GENERATED_HEADERS}) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${header} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} COMPONENT dev) endforeach() ## Build the util library file(GLOB UTIL_SOURCES "util/*.cpp") add_library(util STATIC ${UTIL_SOURCES}) set_target_properties(util PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/util) check_include_file(backtrace.h BACKTRACE_H) if(BACKTRACE_H) target_compile_definitions(util PRIVATE HAVE_BACKTRACE_H) find_library(BACKTRACE_LIB "backtrace" ${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}) endif() if(BACKTRACE_LIB) target_compile_definitions(util PRIVATE ENABLE_BACKTRACE) target_link_libraries(util PRIVATE ${BACKTRACE_LIB}) endif() ## Build the ROCtracer library file(GLOB ROCTRACER_SOURCES "roctracer/*.cpp") add_library(roctracer ${LIBRARY_TYPE} ${ROCTRACER_SOURCES} ${GENERATED_HEADERS} hsa_prof_str.inline.h) set_target_properties(roctracer PROPERTIES CXX_VISIBILITY_PRESET hidden OUTPUT_NAME "roctracer64" DEFINE_SYMBOL "ROCTRACER_EXPORTS" LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/roctracer/exportmap VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) target_compile_definitions(roctracer PUBLIC AMD_INTERNAL_BUILD PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) target_include_directories(roctracer PUBLIC ${HIP_INCLUDE_DIRECTORIES} ${HSA_RUNTIME_INCLUDE_DIRECTORIES} $ $ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/roctracer ${CMAKE_CURRENT_SOURCE_DIR}) target_link_options(roctracer PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/roctracer/exportmap -Wl,--no-undefined) target_link_libraries(roctracer PRIVATE util hsa-runtime64::hsa-runtime64 stdc++fs Threads::Threads dl) install(TARGETS roctracer LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP) ## Install name link library in dev component install(TARGETS roctracer LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY) install(TARGETS roctracer LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan) ## Build the ROCTX library file(GLOB ROCTX_SOURCES "roctx/*.cpp") add_library(roctx ${LIBRARY_TYPE} ${ROCTX_SOURCES}) set_target_properties(roctx PROPERTIES CXX_VISIBILITY_PRESET hidden OUTPUT_NAME "roctx64" DEFINE_SYMBOL "ROCTX_EXPORTS" LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/roctx/exportmap VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) target_include_directories(roctx PUBLIC $ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_link_options(roctx PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/roctx/exportmap -Wl,--no-undefined) install(TARGETS roctx LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP) ## Install name link library in dev component install(TARGETS roctx LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY) install(TARGETS roctx LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan) ## Build the tracer_tool library if (${LIBRARY_TYPE} STREQUAL SHARED) file(GLOB TRACER_TOOL_SOURCES "tracer_tool/*.cpp") add_library(roctracer_tool SHARED ${TRACER_TOOL_SOURCES}) set_target_properties(roctracer_tool PROPERTIES CXX_VISIBILITY_PRESET hidden LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/tracer_tool/exportmap INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}") target_compile_definitions(roctracer_tool PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) target_include_directories(roctracer_tool PRIVATE ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/roctracer ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(roctracer_tool util roctracer hsa-runtime64::hsa-runtime64 stdc++fs Threads::Threads atomic dl) target_link_options(roctracer_tool PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/tracer_tool/exportmap -Wl,--no-undefined) install(TARGETS roctracer_tool LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT runtime) install(TARGETS roctracer_tool LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT asan) add_library(hip_stats SHARED hip_stats/hip_stats.cpp) set_target_properties(hip_stats PROPERTIES INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}") target_compile_definitions(hip_stats PRIVATE __HIP_PLATFORM_AMD__) target_link_libraries(hip_stats roctracer stdc++fs) install(TARGETS hip_stats LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT runtime) install(TARGETS hip_stats LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT asan) endif() ./src/roctracer/0000775000175100017510000000000015176134562013670 5ustar jenkinsjenkins./src/roctracer/correlation_id.cpp0000664000175100017510000000725615176134562017403 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "correlation_id.h" #include "roctracer.h" #include #include #include namespace { // A stack that can be used for TLS variables. TLS destructors are invoked before global destructors // which is a problem if operations invoked by global destructors use TLS variables. If the TLS // stack is destructed, it still has well defined behavior by always returning a dummy element. template class Stack : std::stack> { using parent_type = typename std::stack>; public: Stack() { valid_.store(true, std::memory_order_relaxed); } ~Stack() { valid_.store(false, std::memory_order_relaxed); } template auto& emplace(Args&&... args) { return is_valid() ? parent_type::emplace(std::forward(args)...) : dummy_element_ = T(std::forward(args)...); } void push(const T& v) { if (is_valid()) parent_type::push(v); } void push(T&& v) { if (is_valid()) parent_type::push(std::move(v)); } void pop() { if (is_valid()) parent_type::pop(); } const auto& top() const { return is_valid() ? parent_type::top() : dummy_element_; } auto& top() { return is_valid() ? parent_type::top() : (dummy_element_ = {}); } bool is_valid() const { return valid_.load(std::memory_order_relaxed); } size_t size() const { return is_valid() ? parent_type::size() : 0; } bool empty() const { return size() == 0; } private: std::atomic valid_{false}; T dummy_element_; // Dummy element used when the stack is not valid. }; thread_local Stack correlation_id_stack{}; thread_local Stack external_id_stack{}; } // namespace namespace roctracer { activity_correlation_id_t CorrelationIdPush() { static std::atomic counter{1}; return correlation_id_stack.emplace(counter.fetch_add(1, std::memory_order_relaxed)); } void CorrelationIdPop() { correlation_id_stack.pop(); } activity_correlation_id_t CorrelationId() { return correlation_id_stack.empty() ? 0 : correlation_id_stack.top(); } void ExternalCorrelationIdPush(activity_correlation_id_t external_id) { external_id_stack.push(external_id); } std::optional ExternalCorrelationIdPop() { if (external_id_stack.empty()) return std::nullopt; auto external_id = external_id_stack.top(); external_id_stack.pop(); return std::make_optional(external_id); } std::optional ExternalCorrelationId() { return external_id_stack.empty() ? std::nullopt : std::make_optional(external_id_stack.top()); } } // namespace roctracer./src/roctracer/roctracer.cpp0000664000175100017510000010767315176134562016376 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "roctracer.h" #include "roctracer_hip.h" #include "roctracer_ext.h" #include "roctracer_roctx.h" #include "roctracer_hsa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "correlation_id.h" #include "debug.h" #include "exception.h" #include "hsa_support.h" #include "loader.h" #include "logger.h" #include "memory_pool.h" #include "registration_table.h" #define API_METHOD_PREFIX \ roctracer_status_t err = ROCTRACER_STATUS_SUCCESS; \ try { #define API_METHOD_SUFFIX \ } \ catch (std::exception & e) { \ ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \ err = GetExcStatus(e); \ } \ return err; #define API_METHOD_CATCH(X) \ } \ catch (std::exception & e) { \ ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \ } \ (void)err; \ return X; static inline uint32_t GetPid() { static auto pid = syscall(__NR_getpid); return pid; } static inline uint32_t GetTid() { static thread_local auto tid = syscall(__NR_gettid); return tid; } using namespace roctracer; namespace { /////////////////////////////////////////////////////////////////////////////////////////////////// // Internal library methods // roctracer_start_cb_t roctracer_start_cb = nullptr; roctracer_stop_cb_t roctracer_stop_cb = nullptr; roctracer_status_t GetExcStatus(const std::exception& e) { const ApiError* roctracer_exc_ptr = dynamic_cast(&e); return (roctracer_exc_ptr) ? roctracer_exc_ptr->status() : ROCTRACER_STATUS_ERROR; } std::mutex registration_mutex; // Memory pool routines and primitives std::recursive_mutex memory_pool_mutex; MemoryPool* default_memory_pool = nullptr; } // namespace /////////////////////////////////////////////////////////////////////////////////////////////////// // Public library methods // // Returns library version ROCTRACER_API uint32_t roctracer_version_major() { return ROCTRACER_VERSION_MAJOR; } ROCTRACER_API uint32_t roctracer_version_minor() { return ROCTRACER_VERSION_MINOR; } // Returns the last error ROCTRACER_API const char* roctracer_error_string() { return strdup(util::Logger::Instance().LastMessage().c_str()); } // Return Op string by given domain and activity/API codes // nullptr returned on the error and the library errno is set ROCTRACER_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint32_t kind) { API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_API: return hsa_support::GetApiName(op); case ACTIVITY_DOMAIN_HSA_EVT: return hsa_support::GetEvtName(op); case ACTIVITY_DOMAIN_HSA_OPS: return hsa_support::GetOpsName(op); case ACTIVITY_DOMAIN_HIP_OPS: return HipLoader::Instance().GetOpName(kind); case ACTIVITY_DOMAIN_HIP_API: return HipLoader::Instance().ApiName(op); case ACTIVITY_DOMAIN_EXT_API: return "EXT_API"; default: throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID"); } API_METHOD_CATCH(nullptr) } // Return Op code and kind by given string ROCTRACER_API roctracer_status_t roctracer_op_code(uint32_t domain, const char* str, uint32_t* op, uint32_t* kind) { API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_API: { *op = hsa_support::GetApiCode(str); if (*op == HSA_API_ID_NUMBER) { EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "Invalid API name \"" << str << "\", domain ID(" << domain << ")"); } if (kind != nullptr) *kind = 0; break; } case ACTIVITY_DOMAIN_HIP_API: { *op = hipApiIdByName(str); if (*op == HIP_API_ID_NONE) { EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "Invalid API name \"" << str << "\", domain ID(" << domain << ")"); } if (kind != nullptr) *kind = 0; break; } default: EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "limited domain ID(" << domain << ")"); } API_METHOD_SUFFIX } namespace { template struct DomainTraits; template <> struct DomainTraits { using ApiData = hip_api_data_t; using OperationId = hip_api_id_t; static constexpr size_t kOpIdBegin = HIP_API_ID_FIRST; static constexpr size_t kOpIdEnd = HIP_API_ID_LAST + 1; }; template <> struct DomainTraits { using ApiData = hsa_api_data_t; using OperationId = hsa_api_id_t; static constexpr size_t kOpIdBegin = 0; static constexpr size_t kOpIdEnd = HSA_API_ID_NUMBER; }; template <> struct DomainTraits { using ApiData = roctx_api_data_t; using OperationId = roctx_api_id_t; static constexpr size_t kOpIdBegin = 0; static constexpr size_t kOpIdEnd = ROCTX_API_ID_NUMBER; }; template <> struct DomainTraits { using OperationId = hip_op_id_t; static constexpr size_t kOpIdBegin = 0; static constexpr size_t kOpIdEnd = HIP_OP_ID_NUMBER; }; template <> struct DomainTraits { using OperationId = hsa_op_id_t; static constexpr size_t kOpIdBegin = 0; static constexpr size_t kOpIdEnd = HSA_OP_ID_NUMBER; }; template <> struct DomainTraits { using ApiData = hsa_evt_data_t; using OperationId = hsa_evt_id_t; static constexpr size_t kOpIdBegin = 0; static constexpr size_t kOpIdEnd = HSA_EVT_ID_NUMBER; }; constexpr uint32_t get_op_begin(activity_domain_t domain) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: return DomainTraits::kOpIdBegin; case ACTIVITY_DOMAIN_HSA_API: return DomainTraits::kOpIdBegin; case ACTIVITY_DOMAIN_HSA_EVT: return DomainTraits::kOpIdBegin; case ACTIVITY_DOMAIN_HIP_OPS: return DomainTraits::kOpIdBegin; case ACTIVITY_DOMAIN_HIP_API: return DomainTraits::kOpIdBegin; case ACTIVITY_DOMAIN_ROCTX: return DomainTraits::kOpIdBegin; case ACTIVITY_DOMAIN_EXT_API: return 0; default: throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID"); } } constexpr uint32_t get_op_end(activity_domain_t domain) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: return DomainTraits::kOpIdEnd; case ACTIVITY_DOMAIN_HSA_API: return DomainTraits::kOpIdEnd; case ACTIVITY_DOMAIN_HSA_EVT: return DomainTraits::kOpIdEnd; case ACTIVITY_DOMAIN_HIP_OPS: return DomainTraits::kOpIdEnd; case ACTIVITY_DOMAIN_HIP_API: return DomainTraits::kOpIdEnd; case ACTIVITY_DOMAIN_ROCTX: return DomainTraits::kOpIdEnd; case ACTIVITY_DOMAIN_EXT_API: return get_op_begin(ACTIVITY_DOMAIN_EXT_API); default: throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID"); } } std::atomic stopped_status{false}; struct IsStopped { bool operator()() const { return stopped_status.load(std::memory_order_relaxed); } }; struct NeverStopped { constexpr bool operator()() { return false; } }; using UserCallback = std::pair; template using CallbackRegistrationTable = util::RegistrationTable::kOpIdEnd, IsStopped>; template using ActivityRegistrationTable = util::RegistrationTable::kOpIdEnd, IsStopped>; template struct ApiTracer { using ApiData = typename DomainTraits::ApiData; using OperationId = typename DomainTraits::OperationId; struct TraceData { ApiData api_data; // API specific data (for example, function arguments). uint64_t phase_enter_timestamp; // timestamp when phase_enter was executed. uint64_t phase_data; // data that can be shared between phase_enter and phase_exit. void (*phase_enter)(OperationId operation_id, TraceData* data); void (*phase_exit)(OperationId operation_id, TraceData* data); }; static void Exit(OperationId operation_id, TraceData* trace_data) { if (auto pool = activity_table.Get(operation_id)) { assert(trace_data != nullptr); activity_record_t record{}; record.domain = domain; record.op = operation_id; record.correlation_id = trace_data->api_data.correlation_id; record.begin_ns = trace_data->phase_enter_timestamp; record.end_ns = hsa_support::timestamp_ns(); record.process_id = GetPid(); record.thread_id = GetTid(); if (auto external_id = ExternalCorrelationId()) { roctracer_record_t ext_record{}; ext_record.domain = ACTIVITY_DOMAIN_EXT_API; ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID; ext_record.correlation_id = record.correlation_id; ext_record.external_id = *external_id; // Write the external correlation id record directly followed by the activity record. (*pool)->Write(std::array{ext_record, record}); } else { // Write record to the buffer. (*pool)->Write(record); } } CorrelationIdPop(); } static void Exit_UserCallback(OperationId operation_id, TraceData* trace_data) { if (auto user_callback = callback_table.Get(operation_id)) { assert(trace_data != nullptr); trace_data->api_data.phase = ACTIVITY_API_PHASE_EXIT; user_callback->first(domain, operation_id, &trace_data->api_data, user_callback->second); } Exit(operation_id, trace_data); } static void Enter_UserCallback(OperationId operation_id, TraceData* trace_data) { if (auto user_callback = callback_table.Get(operation_id)) { assert(trace_data != nullptr); trace_data->api_data.phase = ACTIVITY_API_PHASE_ENTER; trace_data->api_data.phase_data = &trace_data->phase_data; user_callback->first(domain, operation_id, &trace_data->api_data, user_callback->second); trace_data->phase_exit = Exit_UserCallback; } else { trace_data->phase_exit = Exit; } } static int Enter(OperationId operation_id, TraceData* trace_data) { bool callback_enabled = callback_table.Get(operation_id).has_value(), activity_enabled = activity_table.Get(operation_id).has_value(); if (!callback_enabled && !activity_enabled) return -1; if (trace_data != nullptr) { // Generate a new correlation ID. trace_data->api_data.correlation_id = CorrelationIdPush(); if (activity_enabled) { trace_data->phase_enter_timestamp = hsa_support::timestamp_ns(); trace_data->phase_enter = nullptr; trace_data->phase_exit = Exit; } if (callback_enabled) { trace_data->phase_enter = Enter_UserCallback; trace_data->phase_exit = [](OperationId, TraceData*) { fatal("should not reach here"); }; } } return 0; } static CallbackRegistrationTable callback_table; static ActivityRegistrationTable activity_table; }; template CallbackRegistrationTable ApiTracer::callback_table; template ActivityRegistrationTable ApiTracer::activity_table; using HIP_ApiTracer = ApiTracer; using HSA_ApiTracer = ApiTracer; CallbackRegistrationTable roctx_api_callback_table; ActivityRegistrationTable hip_ops_activity_table; ActivityRegistrationTable hsa_ops_activity_table; CallbackRegistrationTable hsa_evt_callback_table; // Pending HIP_OPS record tracking. CLR explicitly calls CommitRecord (sentinel data=0x1) // once per command to increment submitted. Delivered is deduplicated by correlation_id // so AccumulateCommand's N records per cid count as one delivery. static std::atomic hip_ops_submitted{0}; static std::atomic hip_ops_delivered{0}; // Reserved sentinel pointer value matching kCommitRecordSentinel in CLR's activity.cpp. // Must never be a valid activity_record_t pointer. static void* const kCommitRecordSentinel = reinterpret_cast(uintptr_t{1}); int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) { switch (domain) { case ACTIVITY_DOMAIN_HSA_API: return HSA_ApiTracer::Enter(static_cast(operation_id), static_cast(data)); case ACTIVITY_DOMAIN_HIP_API: return HIP_ApiTracer::Enter(static_cast(operation_id), static_cast(data)); case ACTIVITY_DOMAIN_HIP_OPS: // data == kCommitRecordSentinel (0x1): CLR's CommitRecord() signals that a command // has been created and will produce an activity record. Called once per command from // IsActivityEnabledAndCommit() in command.cpp. Matched by hip_ops_delivered on the // record delivery path below. if (data == kCommitRecordSentinel) { hip_ops_submitted.fetch_add(1, std::memory_order_relaxed); return 0; } if (data != nullptr) { auto record = static_cast(data); if (auto pool = hip_ops_activity_table.GetForDrain(operation_id)) { if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr) (*pool)->Write(*record, record->kernel_name, strlen(record->kernel_name) + 1, [](auto& record, const void* data) { record.kernel_name = static_cast(data); }); else (*pool)->Write(*record); } // Deduplicate by correlation_id: AccumulateCommand delivers multiple records // with the same cid — only count the transition to a new cid. thread_local activity_correlation_id_t last_delivered_cid{0}; if (record->correlation_id != last_delivered_cid) { last_delivered_cid = record->correlation_id; hip_ops_delivered.fetch_add(1, std::memory_order_release); } return 0; } // IsEnabled query (data == nullptr) if (auto pool = hip_ops_activity_table.Get(operation_id)) { return 0; } break; case ACTIVITY_DOMAIN_ROCTX: if (auto user_callback = roctx_api_callback_table.Get(operation_id)) { if (auto api_data = static_cast::ApiData*>(data)) user_callback->first(ACTIVITY_DOMAIN_ROCTX, operation_id, api_data, user_callback->second); return 0; } break; case ACTIVITY_DOMAIN_HSA_OPS: if (auto pool = hsa_ops_activity_table.Get(operation_id)) { if (auto record = static_cast(data)) (*pool)->Write(*record); return 0; } break; case ACTIVITY_DOMAIN_HSA_EVT: if (auto user_callback = hsa_evt_callback_table.Get(operation_id)) { if (auto api_data = static_cast::ApiData*>(data)) user_callback->first(ACTIVITY_DOMAIN_HSA_EVT, operation_id, api_data, user_callback->second); return 0; } break; default: break; } return -1; } template struct RegistrationTableGroup { private: bool AllEmpty() const { return std::apply([](auto&&... tables) { return (tables.IsEmpty() && ...); }, tables_); } public: template RegistrationTableGroup(Functor1&& engage_tracer, Functor2&& disengage_tracer, Tables&... tables) : engage_tracer_(std::forward(engage_tracer)), disengage_tracer_(std::forward(disengage_tracer)), tables_(tables...) {} template void Register(T& table, uint32_t operation_id, Args... args) const { if (AllEmpty()) engage_tracer_(); table.Register(operation_id, std::forward(args)...); } template void Unregister(T& table, uint32_t operation_id) const { table.Unregister(operation_id); if (AllEmpty()) disengage_tracer_(); } private: const std::function engage_tracer_, disengage_tracer_; const std::tuple tables_; }; RegistrationTableGroup HSA_registration_group( []() { hsa_support::RegisterTracerCallback(TracerCallback); }, []() { hsa_support::RegisterTracerCallback(nullptr); }, HSA_ApiTracer::callback_table, HSA_ApiTracer::activity_table, hsa_ops_activity_table, hsa_evt_callback_table); RegistrationTableGroup HIP_registration_group( []() { HipLoader::Instance().RegisterTracerCallback(TracerCallback); }, []() { HipLoader::Instance().RegisterTracerCallback(nullptr); }, HIP_ApiTracer::callback_table, HIP_ApiTracer::activity_table, hip_ops_activity_table); RegistrationTableGroup ROCTX_registration_group( []() { RocTxLoader::Instance().RegisterTracerCallback(TracerCallback); }, []() { RocTxLoader::Instance().RegisterTracerCallback(nullptr); }, roctx_api_callback_table); } // namespace // Enable runtime API callbacks static void roctracer_enable_callback_impl(roctracer_domain_t domain, uint32_t operation_id, roctracer_rtapi_callback_t callback, void* user_data) { std::lock_guard lock(registration_mutex); if (operation_id >= get_op_end(domain) || callback == nullptr) throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument"); switch (domain) { case ACTIVITY_DOMAIN_HSA_EVT: HSA_registration_group.Register(hsa_evt_callback_table, operation_id, callback, user_data); break; case ACTIVITY_DOMAIN_HSA_API: HSA_registration_group.Register(HSA_ApiTracer::callback_table, operation_id, callback, user_data); break; case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HIP_API: if (HipLoader::Instance().IsEnabled()) HIP_registration_group.Register(HIP_ApiTracer::callback_table, operation_id, callback, user_data); break; case ACTIVITY_DOMAIN_HIP_OPS: break; case ACTIVITY_DOMAIN_ROCTX: if (RocTxLoader::Instance().IsEnabled()) ROCTX_registration_group.Register(roctx_api_callback_table, operation_id, callback, user_data); break; default: EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")"); } } ROCTRACER_API roctracer_status_t roctracer_enable_op_callback(roctracer_domain_t domain, uint32_t op, roctracer_rtapi_callback_t callback, void* user_data) { API_METHOD_PREFIX roctracer_enable_callback_impl(domain, op, callback, user_data); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback( roctracer_domain_t domain, roctracer_rtapi_callback_t callback, void* user_data) { API_METHOD_PREFIX const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) roctracer_enable_callback_impl(domain, op, callback, user_data); API_METHOD_SUFFIX } // Disable runtime API callbacks static void roctracer_disable_callback_impl(roctracer_domain_t domain, uint32_t operation_id) { std::lock_guard lock(registration_mutex); if (operation_id >= get_op_end(domain)) throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument"); switch (domain) { case ACTIVITY_DOMAIN_HSA_EVT: HSA_registration_group.Unregister(hsa_evt_callback_table, operation_id); break; case ACTIVITY_DOMAIN_HSA_API: HSA_registration_group.Unregister(HSA_ApiTracer::callback_table, operation_id); break; case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HIP_API: if (HipLoader::Instance().IsEnabled()) HIP_registration_group.Unregister(HIP_ApiTracer::callback_table, operation_id); break; case ACTIVITY_DOMAIN_HIP_OPS: break; case ACTIVITY_DOMAIN_ROCTX: if (RocTxLoader::Instance().IsEnabled()) ROCTX_registration_group.Unregister(roctx_api_callback_table, operation_id); break; default: EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")"); } } ROCTRACER_API roctracer_status_t roctracer_disable_op_callback(roctracer_domain_t domain, uint32_t op) { API_METHOD_PREFIX roctracer_disable_callback_impl(domain, op); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_disable_domain_callback(roctracer_domain_t domain) { API_METHOD_PREFIX const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) roctracer_disable_callback_impl(domain, op); API_METHOD_SUFFIX } // Return default pool and set new one if parameter pool is not NULL. ROCTRACER_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) { std::lock_guard lock(memory_pool_mutex); roctracer_pool_t* p = reinterpret_cast(default_memory_pool); if (pool != nullptr) default_memory_pool = reinterpret_cast(pool); return p; } ROCTRACER_API roctracer_pool_t* roctracer_default_pool() { std::lock_guard lock(memory_pool_mutex); return reinterpret_cast(default_memory_pool); } // Open memory pool static void roctracer_open_pool_impl(const roctracer_properties_t* properties, roctracer_pool_t** pool) { std::lock_guard lock(memory_pool_mutex); if ((pool == nullptr) && (default_memory_pool != nullptr)) { EXC_RAISING(ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED, "default pool already set"); } MemoryPool* p = new MemoryPool(*properties); if (p == nullptr) EXC_RAISING(ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION, "MemoryPool() error"); if (pool != nullptr) *pool = p; else default_memory_pool = p; } ROCTRACER_API roctracer_status_t roctracer_open_pool_expl(const roctracer_properties_t* properties, roctracer_pool_t** pool) { API_METHOD_PREFIX roctracer_open_pool_impl(properties, pool); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_open_pool(const roctracer_properties_t* properties) { API_METHOD_PREFIX roctracer_open_pool_impl(properties, nullptr); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_next_record(const activity_record_t* record, const activity_record_t** next) { API_METHOD_PREFIX *next = record + 1; API_METHOD_SUFFIX } // Enable activity records logging static void roctracer_enable_activity_impl(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) { std::lock_guard lock(registration_mutex); MemoryPool* memory_pool = reinterpret_cast(pool); if (memory_pool == nullptr) memory_pool = default_memory_pool; if (memory_pool == nullptr) EXC_RAISING(ROCTRACER_STATUS_ERROR_DEFAULT_POOL_UNDEFINED, "no default pool"); if (op >= get_op_end(domain)) throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument"); switch (domain) { case ACTIVITY_DOMAIN_HSA_EVT: break; case ACTIVITY_DOMAIN_HSA_API: HSA_registration_group.Register(HSA_ApiTracer::activity_table, op, memory_pool); break; case ACTIVITY_DOMAIN_HSA_OPS: HSA_registration_group.Register(hsa_ops_activity_table, op, memory_pool); break; case ACTIVITY_DOMAIN_HIP_API: if (HipLoader::Instance().IsEnabled()) HIP_registration_group.Register(HIP_ApiTracer::activity_table, op, memory_pool); break; case ACTIVITY_DOMAIN_HIP_OPS: if (HipLoader::Instance().IsEnabled()) HIP_registration_group.Register(hip_ops_activity_table, op, memory_pool); break; case ACTIVITY_DOMAIN_ROCTX: break; default: EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")"); } } ROCTRACER_API roctracer_status_t roctracer_enable_op_activity_expl(roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) { API_METHOD_PREFIX roctracer_enable_activity_impl(domain, op, pool); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_enable_op_activity(activity_domain_t domain, uint32_t op) { API_METHOD_PREFIX roctracer_enable_activity_impl(domain, op, nullptr); API_METHOD_SUFFIX } static void roctracer_enable_domain_activity_impl(roctracer_domain_t domain, roctracer_pool_t* pool) { const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) try { roctracer_enable_activity_impl(domain, op, pool); } catch (const ApiError& err) { if (err.status() != ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED) throw; } } ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity_expl(roctracer_domain_t domain, roctracer_pool_t* pool) { API_METHOD_PREFIX roctracer_enable_domain_activity_impl(domain, pool); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(activity_domain_t domain) { API_METHOD_PREFIX roctracer_enable_domain_activity_impl(domain, nullptr); API_METHOD_SUFFIX } // Disable activity records logging static void roctracer_disable_activity_impl(roctracer_domain_t domain, uint32_t op) { std::lock_guard lock(registration_mutex); if (op >= get_op_end(domain)) throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument"); switch (domain) { case ACTIVITY_DOMAIN_HSA_EVT: break; case ACTIVITY_DOMAIN_HSA_API: HSA_registration_group.Unregister(HSA_ApiTracer::activity_table, op); break; case ACTIVITY_DOMAIN_HSA_OPS: HSA_registration_group.Unregister(hsa_ops_activity_table, op); break; case ACTIVITY_DOMAIN_HIP_API: if (HipLoader::Instance().IsEnabled()) HIP_registration_group.Unregister(HIP_ApiTracer::activity_table, op); break; case ACTIVITY_DOMAIN_HIP_OPS: if (HipLoader::Instance().IsEnabled()) HIP_registration_group.Unregister(hip_ops_activity_table, op); break; case ACTIVITY_DOMAIN_ROCTX: break; default: EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")"); } } ROCTRACER_API roctracer_status_t roctracer_disable_op_activity(roctracer_domain_t domain, uint32_t op) { API_METHOD_PREFIX roctracer_disable_activity_impl(domain, op); API_METHOD_SUFFIX } static void roctracer_disable_domain_activity_impl(roctracer_domain_t domain) { const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) try { roctracer_disable_activity_impl(domain, op); } catch (const ApiError& err) { if (err.status() != ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED) throw; } } ROCTRACER_API roctracer_status_t roctracer_disable_domain_activity(roctracer_domain_t domain) { API_METHOD_PREFIX roctracer_disable_domain_activity_impl(domain); API_METHOD_SUFFIX } // Close memory pool static void roctracer_close_pool_impl(roctracer_pool_t* pool) { std::lock_guard lock(memory_pool_mutex); if (pool == nullptr) pool = reinterpret_cast(default_memory_pool); if (pool == nullptr) return; MemoryPool* p = reinterpret_cast(pool); if (p == default_memory_pool) default_memory_pool = nullptr; #if 0 // Disable any activities that specify the pool being deleted. std::vector> ops; act_journal.ForEach( [&ops, pool](roctracer_domain_t domain, uint32_t op, const ActivityJournalData& data) { if (pool == data.pool) ops.emplace_back(domain, op); return true; }); for (auto&& [domain, op] : ops) roctracer_disable_activity_impl(domain, op); #endif delete (p); } ROCTRACER_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) { API_METHOD_PREFIX roctracer_close_pool_impl(pool); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_close_pool() { API_METHOD_PREFIX roctracer_close_pool_impl(NULL); API_METHOD_SUFFIX } // Flush available activity records static void roctracer_flush_activity_impl(roctracer_pool_t* pool) { if (pool == nullptr) pool = roctracer_default_pool(); MemoryPool* default_memory_pool = reinterpret_cast(pool); if (default_memory_pool != nullptr) default_memory_pool->Flush(); } ROCTRACER_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* pool) { API_METHOD_PREFIX roctracer_flush_activity_impl(pool); API_METHOD_SUFFIX } ROCTRACER_API roctracer_status_t roctracer_flush_activity() { API_METHOD_PREFIX roctracer_flush_activity_impl(nullptr); API_METHOD_SUFFIX } // Notifies that the calling thread is entering an external API region. // Push an external correlation id for the calling thread. ROCTRACER_API roctracer_status_t roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) { API_METHOD_PREFIX ExternalCorrelationIdPush(id); API_METHOD_SUFFIX } // Notifies that the calling thread is leaving an external API region. // Pop an external correlation id for the calling thread, and return it in 'last_id' if not null. ROCTRACER_API roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) { API_METHOD_PREFIX auto external_id = ExternalCorrelationIdPop(); if (!external_id) { if (last_id != nullptr) *last_id = 0; EXC_RAISING(ROCTRACER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID, "unbalanced external correlation id pop"); } if (last_id != nullptr) *last_id = *external_id; API_METHOD_SUFFIX } // Start API ROCTRACER_API void roctracer_start() { if (stopped_status.exchange(false, std::memory_order_relaxed)) { // Reset counters so prior divergence (e.g. from errored commands) doesn't // cause spurious drain timeouts in the next stop() call. hip_ops_submitted.store(0, std::memory_order_relaxed); hip_ops_delivered.store(0, std::memory_order_relaxed); if (roctracer_start_cb) roctracer_start_cb(); } } // Stop API ROCTRACER_API void roctracer_stop() { if (!stopped_status.exchange(true, std::memory_order_relaxed)) { // Drain in-flight activity records committed before stop. // GPU work should already be complete (e.g. after hipDeviceSynchronize), // async handlers just need time to fire and deliver records. constexpr int kDrainTimeoutMs = 100; for (int waited = 0; hip_ops_delivered.load(std::memory_order_acquire) < hip_ops_submitted.load(std::memory_order_acquire) && waited < kDrainTimeoutMs; waited++) { usleep(1000); } { uint64_t delivered = hip_ops_delivered.load(std::memory_order_acquire); uint64_t submitted = hip_ops_submitted.load(std::memory_order_acquire); if (delivered < submitted) { ERR_LOGGING("roctracer_stop: drain timeout after " << kDrainTimeoutMs << "ms, " << (submitted - delivered) << " activity records may be lost"); } } // Flush the pool so the client's buffer_callback_fun receives any records // still sitting in the partially-filled buffer before stop returns. roctracer_flush_activity_impl(nullptr); if (roctracer_stop_cb) roctracer_stop_cb(); } } ROCTRACER_API roctracer_status_t roctracer_get_timestamp(roctracer_timestamp_t* timestamp) { API_METHOD_PREFIX *timestamp = hsa_support::timestamp_ns(); API_METHOD_SUFFIX } // Set properties ROCTRACER_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain, void* properties) { API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: case ACTIVITY_DOMAIN_HSA_EVT: case ACTIVITY_DOMAIN_HSA_API: case ACTIVITY_DOMAIN_HIP_OPS: case ACTIVITY_DOMAIN_HIP_API: { break; } case ACTIVITY_DOMAIN_EXT_API: { roctracer_ext_properties_t* ops_properties = reinterpret_cast(properties); roctracer_start_cb = ops_properties->start_cb; roctracer_stop_cb = ops_properties->stop_cb; break; } default: EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")"); } API_METHOD_SUFFIX } extern "C" { // The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader // itself, not by code during _init. 'extern const' seems to do that although that is not a // guarantee. ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50; // HSA-runtime tool on-load method ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { [](auto&&...) {}(runtime_version, failed_tool_count, failed_tool_names); hsa_support::Initialize(table); return true; } ROCTRACER_EXPORT void OnUnload() { hsa_support::Finalize(); } } // extern "C"./src/roctracer/exportmap0000664000175100017510000000274715176134562015644 0ustar jenkinsjenkinsROCTRACER_4.0 { global: OnLoad; OnUnload; roctracer_activity_pop_external_correlation_id; roctracer_activity_push_external_correlation_id; roctracer_close_pool_expl; roctracer_default_pool_expl; roctracer_disable_activity; roctracer_disable_callback; roctracer_disable_domain_activity; roctracer_disable_domain_callback; roctracer_disable_op_activity; roctracer_disable_op_callback; roctracer_enable_activity_expl; roctracer_enable_callback; roctracer_enable_domain_activity_expl; roctracer_enable_domain_callback; roctracer_enable_op_activity_expl; roctracer_enable_op_callback; roctracer_error_string; roctracer_flush_activity_expl; roctracer_flush_buf; roctracer_get_timestamp; roctracer_load; roctracer_mark; roctracer_op_code; roctracer_open_pool_expl; roctracer_op_string; roctracer_set_properties; roctracer_start; roctracer_stop; roctracer_unload; roctracer_version_major; roctracer_version_minor; local: *; }; ROCTRACER_4.1 { global: HSA_AMD_TOOL_PRIORITY; roctracer_close_pool; roctracer_default_pool; roctracer_enable_activity; roctracer_enable_domain_activity; roctracer_enable_op_activity; roctracer_flush_activity; roctracer_next_record; roctracer_open_pool; } ROCTRACER_4.0;./src/roctracer/hsa_support.h0000664000175100017510000000366215176134562016417 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef HSA_SUPPORT_H_ #define HSA_SUPPORT_H_ #include "roctracer.h" #include "roctracer_hsa.h" #include namespace roctracer::hsa_support { struct hsa_trace_data_t { hsa_api_data_t api_data; uint64_t phase_enter_timestamp; uint64_t phase_data; void (*phase_enter)(hsa_api_id_t operation_id, hsa_trace_data_t* data); void (*phase_exit)(hsa_api_id_t operation_id, hsa_trace_data_t* data); }; void Initialize(HsaApiTable* table); void Finalize(); const char* GetApiName(uint32_t id); const char* GetEvtName(uint32_t id); const char* GetOpsName(uint32_t id); uint32_t GetApiCode(const char* str); void RegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id, void* data)); uint64_t timestamp_ns(); } // namespace roctracer::hsa_support #endif // HSA_SUPPORT_H_ ./src/roctracer/correlation_id.h0000664000175100017510000000427615176134562017047 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "roctracer.h" #include namespace roctracer { // Start a new correlation ID region and push it onto the thread local stack. Correlation ID // regions are nested and per-thread. activity_correlation_id_t CorrelationIdPush(); // Stop the current correlation ID region and pop it from the thread local stack. void CorrelationIdPop(); // Return the ID currently active correlation ID region, or 0 if no regin is active. activity_correlation_id_t CorrelationId(); // Start a new external correlation ID region for the given \p external_id. As for the internal // correlation ID regions, external correlation ID regions are nested and per-thread. void ExternalCorrelationIdPush(activity_correlation_id_t external_id); // Stop the current external correlation ID region and return the external_id used to start the // region. Return a nullopt if no region was active. std::optional ExternalCorrelationIdPop(); // Return the current external correlation ID or nullopt is no region is active. std::optional ExternalCorrelationId(); } // namespace roctracer./src/roctracer/hip_full_api.h0000664000175100017510000000010015176134562016463 0ustar jenkinsjenkins#include #include ./src/roctracer/hsa_support.cpp0000664000175100017510000006531615176134562016756 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "hsa_support.h" #include "correlation_id.h" #include "debug.h" #include "exception.h" #include "memory_pool.h" #include "roctracer.h" #include "roctracer_hsa.h" #include #include #include #include #include #include #include namespace { std::atomic report_activity; bool IsEnabled(activity_domain_t domain, uint32_t operation_id) { auto report = report_activity.load(std::memory_order_relaxed); return report && report(domain, operation_id, nullptr) == 0; } void ReportActivity(activity_domain_t domain, uint32_t operation_id, void* data) { if (auto report = report_activity.load(std::memory_order_relaxed)) report(domain, operation_id, data); } } // namespace #include "hsa_prof_str.inline.h" namespace roctracer::hsa_support { namespace { CoreApiTable saved_core_api{}; AmdExtTable saved_amd_ext_api{}; hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{}; struct AgentInfo { uint32_t id; hsa_device_type_t type; }; std::unordered_map agent_info_map; class Tracker { public: enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 }; enum entry_type_t { DFLT_ENTRY_TYPE = 0, API_ENTRY_TYPE = 1, COPY_ENTRY_TYPE = 2, KERNEL_ENTRY_TYPE = 3, NUM_ENTRY_TYPE = 4 }; struct entry_t { std::atomic valid; entry_type_t type; uint64_t correlation_id; roctracer_timestamp_t begin; // begin timestamp, ns roctracer_timestamp_t end; // end timestamp, ns hsa_agent_t agent; uint32_t dev_index; hsa_signal_t orig; hsa_signal_t signal; void (*handler)(const entry_t*); union { struct { } copy; struct { const char* name; hsa_agent_t agent; uint32_t tid; } kernel; }; }; // Add tracker entry inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) { hsa_status_t status = HSA_STATUS_ERROR; // Creating a new tracker entry entry->type = type; entry->agent = agent; entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index; entry->orig = signal; entry->valid.store(ENTRY_INIT, std::memory_order_release); // Creating a proxy signal status = saved_core_api.hsa_signal_create_fn(1, 0, NULL, &(entry->signal)); if (status != HSA_STATUS_SUCCESS) fatal("hsa_signal_create failed"); status = saved_amd_ext_api.hsa_amd_signal_async_handler_fn( entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); if (status != HSA_STATUS_SUCCESS) fatal("hsa_amd_signal_async_handler failed"); } // Delete tracker entry inline static void Disable(entry_t* entry) { saved_core_api.hsa_signal_destroy_fn(entry->signal); entry->valid.store(ENTRY_INV, std::memory_order_release); } private: // Entry completion inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { static roctracer_timestamp_t sysclock_period = []() { uint64_t sysclock_hz = 0; hsa_status_t status = saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_info failed"); return (uint64_t)1000000000 / sysclock_hz; }(); if (entry->type == COPY_ENTRY_TYPE) { hsa_amd_profiling_async_copy_time_t async_copy_time{}; hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_get_async_copy_time_fn( entry->signal, &async_copy_time); if (status != HSA_STATUS_SUCCESS) fatal("hsa_amd_profiling_get_async_copy_time failed"); entry->begin = async_copy_time.start * sysclock_period; entry->end = async_copy_time.end * sysclock_period; } else { assert(false && "should not reach here"); } hsa_signal_t orig = entry->orig; hsa_signal_t signal = entry->signal; // Releasing completed entry entry->valid.store(ENTRY_COMPL, std::memory_order_release); assert(entry->handler != nullptr); entry->handler(entry); // Original intercepted signal completion if (orig.handle) { amd_signal_t* orig_signal_ptr = reinterpret_cast(orig.handle); amd_signal_t* prof_signal_ptr = reinterpret_cast(signal.handle); orig_signal_ptr->start_ts = prof_signal_ptr->start_ts; orig_signal_ptr->end_ts = prof_signal_ptr->end_ts; [[maybe_unused]] const hsa_signal_value_t new_value = saved_core_api.hsa_signal_load_relaxed_fn(orig) - 1; assert(signal_value == new_value && "Tracker::Complete bad signal value"); saved_core_api.hsa_signal_store_screlease_fn(orig, signal_value); } saved_core_api.hsa_signal_destroy_fn(signal); delete entry; } // Handler for packet completion static bool Handler(hsa_signal_value_t signal_value, void* arg) { // Acquire entry entry_t* entry = reinterpret_cast(arg); while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield(); // Complete entry Tracker::Complete(signal_value, entry); return false; } }; hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) { hsa_status_t status = saved_core_api.hsa_memory_allocate_fn(region, size, ptr); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) { hsa_evt_data_t data{}; data.allocate.ptr = *ptr; data.allocate.size = size; if (saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS || saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) != HSA_STATUS_SUCCESS) fatal("hsa_region_get_info failed"); ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data); } return HSA_STATUS_SUCCESS; } hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent, hsa_access_permission_t access) { hsa_status_t status = saved_core_api.hsa_memory_assign_agent_fn(ptr, agent, access); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) { hsa_evt_data_t data{}; data.device.ptr = ptr; if (saved_core_api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &data.device.type) != HSA_STATUS_SUCCESS) fatal("hsa_agent_get_info failed"); ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data); } return HSA_STATUS_SUCCESS; } hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) { hsa_status_t status = saved_core_api.hsa_memory_copy_fn(dst, src, size); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY)) { hsa_evt_data_t data{}; data.memcopy.dst = dst; data.memcopy.src = src; data.memcopy.size = size; ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY, &data); } return HSA_STATUS_SUCCESS; } hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size, uint32_t flags, void** ptr) { hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_pool_allocate_fn(pool, size, flags, ptr); if (size == 0 || status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) { hsa_evt_data_t data{}; data.allocate.ptr = *ptr; data.allocate.size = size; if (saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn( pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS || saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn( pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) != HSA_STATUS_SUCCESS) fatal("hsa_region_get_info failed"); ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data); } if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) { auto callback_data = std::make_pair(pool, ptr); auto agent_callback = [](hsa_agent_t agent, void* iterate_agent_callback_data) { auto [pool, ptr] = *reinterpret_cast(iterate_agent_callback_data); if (hsa_amd_memory_pool_access_t value; saved_amd_ext_api.hsa_amd_agent_memory_pool_get_info_fn( agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &value) != HSA_STATUS_SUCCESS || value != HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT) return HSA_STATUS_SUCCESS; auto it = agent_info_map.find(agent.handle); if (it == agent_info_map.end()) fatal("agent was not found in the agent_info map"); hsa_evt_data_t data{}; data.device.type = it->second.type; data.device.id = it->second.id; data.device.agent = agent; data.device.ptr = ptr; ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data); return HSA_STATUS_SUCCESS; }; saved_core_api.hsa_iterate_agents_fn(agent_callback, &callback_data); } return HSA_STATUS_SUCCESS; } hsa_status_t MemoryPoolFreeIntercept(void* ptr) { if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) { hsa_evt_data_t data{}; data.allocate.ptr = ptr; data.allocate.size = 0; ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data); } return saved_amd_ext_api.hsa_amd_memory_pool_free_fn(ptr); } // Agent allow access callback 'hsa_amd_agents_allow_access' hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t* agents, const uint32_t* flags, const void* ptr) { hsa_status_t status = saved_amd_ext_api.hsa_amd_agents_allow_access_fn(num_agents, agents, flags, ptr); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) { while (num_agents--) { hsa_agent_t agent = *agents++; auto it = agent_info_map.find(agent.handle); if (it == agent_info_map.end()) fatal("agent was not found in the agent_info map"); hsa_evt_data_t data{}; data.device.type = it->second.type; data.device.id = it->second.id; data.device.agent = agent; data.device.ptr = ptr; ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data); } } return HSA_STATUS_SUCCESS; } struct CodeObjectCallbackArg { activity_rtapi_callback_t callback_fun; void* callback_arg; bool unload; }; hsa_status_t CodeObjectCallback(hsa_executable_t executable, hsa_loaded_code_object_t loaded_code_object, void* arg) { hsa_evt_data_t data{}; if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE, &data.codeobj.storage_type) != HSA_STATUS_SUCCESS) fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) { if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE, &data.codeobj.storage_file) != HSA_STATUS_SUCCESS || data.codeobj.storage_file == -1) fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); data.codeobj.memory_base = data.codeobj.memory_size = 0; } else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) { if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE, &data.codeobj.memory_base) != HSA_STATUS_SUCCESS || hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE, &data.codeobj.memory_size) != HSA_STATUS_SUCCESS) fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); data.codeobj.storage_file = -1; } else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) { return HSA_STATUS_SUCCESS; // FIXME: do we really not care about these code objects? } else { fatal("unknown code object storage type: %d", data.codeobj.storage_type); } if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, &data.codeobj.load_base) != HSA_STATUS_SUCCESS || hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, &data.codeobj.load_size) != HSA_STATUS_SUCCESS || hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, &data.codeobj.load_delta) != HSA_STATUS_SUCCESS) fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, &data.codeobj.uri_length) != HSA_STATUS_SUCCESS) fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); std::string uri_str(data.codeobj.uri_length, '\0'); if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str.data()) != HSA_STATUS_SUCCESS) fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); data.codeobj.uri = uri_str.c_str(); data.codeobj.unload = *static_cast(arg) ? 1 : 0; ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, &data); return HSA_STATUS_SUCCESS; } hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* options) { hsa_status_t status = saved_core_api.hsa_executable_freeze_fn(executable, options); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) { bool unload = false; hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( executable, CodeObjectCallback, &unload); } return HSA_STATUS_SUCCESS; } hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) { if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) { bool unload = true; hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( executable, CodeObjectCallback, &unload); } return saved_core_api.hsa_executable_destroy_fn(executable); } std::atomic profiling_async_copy_enable{false}; hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) { hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(enable); if (status == HSA_STATUS_SUCCESS) { profiling_async_copy_enable.exchange(enable, std::memory_order_release); } return status; } void MemoryASyncCopyHandler(const Tracker::entry_t* entry) { activity_record_t record{}; record.domain = ACTIVITY_DOMAIN_HSA_OPS; record.op = HSA_OP_ID_COPY; record.begin_ns = entry->begin; record.end_ns = entry->end; record.device_id = 0; record.correlation_id = entry->correlation_id; ReportActivity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY, &record); } hsa_status_t MemoryASyncCopyOnEngineIntercept( void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal, hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) { bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY); // FIXME: what happens if the state changes before returning? [[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn( profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { return saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal, engine_id, force_copy_on_sdma); } Tracker::entry_t* entry = new Tracker::entry_t(); entry->handler = MemoryASyncCopyHandler; entry->correlation_id = CorrelationId(); Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal, engine_id, force_copy_on_sdma); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); return status; } hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal) { bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY); // FIXME: what happens if the state changes before returning? [[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn( profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { return saved_amd_ext_api.hsa_amd_memory_async_copy_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal); } Tracker::entry_t* entry = new Tracker::entry_t(); entry->handler = MemoryASyncCopyHandler; entry->correlation_id = CorrelationId(); Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); return status; } hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent, hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal) { bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY); // FIXME: what happens if the state changes before returning? [[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn( profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn( dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, completion_signal); } Tracker::entry_t* entry = new Tracker::entry_t(); entry->handler = MemoryASyncCopyHandler; entry->correlation_id = CorrelationId(); Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn( dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); return status; } } // namespace roctracer_timestamp_t timestamp_ns() { // If the HSA intercept is installed, then use the "original" 'hsa_system_get_info' function to // avoid reporting calls for internal use of the HSA API by the tracer. auto hsa_system_get_info_fn = saved_core_api.hsa_system_get_info_fn; // If the HSA intercept is not installed, use the default 'hsa_system_get_info'. if (hsa_system_get_info_fn == nullptr) hsa_system_get_info_fn = hsa_system_get_info; uint64_t sysclock; if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); status == HSA_STATUS_ERROR_NOT_INITIALIZED) return 0; else if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_info failed"); static uint64_t sysclock_period = [&]() { uint64_t sysclock_hz = 0; if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_info failed"); return (uint64_t)1000000000 / sysclock_hz; }(); return sysclock * sysclock_period; } void Initialize(HsaApiTable* table) { // Save the HSA core api and amd_ext api. saved_core_api = *table->core_; saved_amd_ext_api = *table->amd_ext_; // Enumerate the agents. if (hsa_support::saved_core_api.hsa_iterate_agents_fn( [](hsa_agent_t agent, void* data) { hsa_support::AgentInfo agent_info; if (hsa_support::saved_core_api.hsa_agent_get_info_fn( agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS) fatal("hsa_agent_get_info failed"); switch (agent_info.type) { case HSA_DEVICE_TYPE_CPU: static int cpu_agent_count = 0; agent_info.id = cpu_agent_count++; break; case HSA_DEVICE_TYPE_GPU: { uint32_t driver_node_id; if (hsa_support::saved_core_api.hsa_agent_get_info_fn( agent, static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), &driver_node_id) != HSA_STATUS_SUCCESS) fatal("hsa_agent_get_info failed"); agent_info.id = driver_node_id; } break; default: static int other_agent_count = 0; agent_info.id = other_agent_count++; break; } hsa_support::agent_info_map.emplace(agent.handle, agent_info); return HSA_STATUS_SUCCESS; }, nullptr) != HSA_STATUS_SUCCESS) fatal("hsa_iterate_agents failed"); // Install the code object intercept. hsa_status_t status = table->core_->hsa_system_get_major_extension_table_fn( HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api); if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_major_extension_table failed"); // Install the HSA_OPS intercept table->amd_ext_->hsa_amd_memory_async_copy_fn = MemoryASyncCopyIntercept; table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = MemoryASyncCopyRectIntercept; table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn = MemoryASyncCopyOnEngineIntercept; table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn = ProfilingAsyncCopyEnableIntercept; // Install the HSA_EVT intercept table->core_->hsa_memory_allocate_fn = MemoryAllocateIntercept; table->core_->hsa_memory_assign_agent_fn = MemoryAssignAgentIntercept; table->core_->hsa_memory_copy_fn = MemoryCopyIntercept; table->amd_ext_->hsa_amd_memory_pool_allocate_fn = MemoryPoolAllocateIntercept; table->amd_ext_->hsa_amd_memory_pool_free_fn = MemoryPoolFreeIntercept; table->amd_ext_->hsa_amd_agents_allow_access_fn = AgentsAllowAccessIntercept; table->core_->hsa_executable_freeze_fn = ExecutableFreezeIntercept; table->core_->hsa_executable_destroy_fn = ExecutableDestroyIntercept; // Install the HSA_API wrappers detail::InstallCoreApiWrappers(table->core_); detail::InstallAmdExtWrappers(table->amd_ext_); detail::InstallImageExtWrappers(table->image_ext_); } void Finalize() { if (hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(profiling_async_copy_enable.load(std::memory_order_relaxed)); status != HSA_STATUS_SUCCESS) assert(!"hsa_amd_profiling_async_copy_enable failed"); memset(&saved_core_api, '\0', sizeof(saved_core_api)); memset(&saved_amd_ext_api, '\0', sizeof(saved_amd_ext_api)); memset(&hsa_loader_api, '\0', sizeof(hsa_loader_api)); } const char* GetApiName(uint32_t id) { return detail::GetApiName(id); } const char* GetEvtName(uint32_t id) { switch (id) { case HSA_EVT_ID_ALLOCATE: return "ALLOCATE"; case HSA_EVT_ID_DEVICE: return "DEVICE"; case HSA_EVT_ID_MEMCOPY: return "MEMCOPY"; case HSA_EVT_ID_SUBMIT: return "SUBMIT"; case HSA_EVT_ID_KSYMBOL: return "KSYMBOL"; case HSA_EVT_ID_CODEOBJ: return "CODEOBJ"; case HSA_EVT_ID_NUMBER: break; } throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA EVT callback id"); } const char* GetOpsName(uint32_t id) { switch (id) { case HSA_OP_ID_DISPATCH: return "DISPATCH"; case HSA_OP_ID_COPY: return "COPY"; case HSA_OP_ID_BARRIER: return "BARRIER"; case HSA_OP_ID_RESERVED1: return "PCSAMPLE"; } throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA OPS callback id"); } uint32_t GetApiCode(const char* str) { return detail::GetApiCode(str); } void RegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id, void* data)) { report_activity.store(function, std::memory_order_relaxed); } } // namespace roctracer::hsa_support ./src/roctracer/exception.h0000664000175100017510000000411215176134562016035 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef EXCEPTION_H_ #define EXCEPTION_H_ #include #include #include #include #define EXC_RAISING(error, stream) \ do { \ std::ostringstream oss; \ oss << __FUNCTION__ << "(), " << stream; \ throw roctracer::ApiError(error, oss.str()); \ } while (false) namespace roctracer { class ApiError : public std::runtime_error { public: explicit ApiError(roctracer_status_t status, const std::string& what_arg) : std::runtime_error(what_arg), status_(status) {} roctracer_status_t status() const noexcept { return status_; } private: const roctracer_status_t status_; }; } // namespace roctracer #endif // EXCEPTION_H_ ./src/roctracer/backward_compat.cpp0000664000175100017510000000611615176134562017521 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "roctracer.h" extern "C" { // Deprecated functions: ROCTRACER_API int roctracer_load() { return 1; } ROCTRACER_API void roctracer_unload() {} ROCTRACER_API void roctracer_flush_buf() {} ROCTRACER_API void roctracer_mark(const char*) {} ROCTRACER_API roctracer_status_t roctracer_enable_callback(roctracer_rtapi_callback_t callback, void* user_data) { for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) if (auto status = roctracer_enable_domain_callback((roctracer_domain_t)domain, callback, user_data); status != ROCTRACER_STATUS_SUCCESS) return status; return ROCTRACER_STATUS_SUCCESS; } ROCTRACER_API roctracer_status_t roctracer_disable_callback() { for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) if (auto status = roctracer_disable_domain_callback((roctracer_domain_t)domain); status != ROCTRACER_STATUS_SUCCESS) return status; return ROCTRACER_STATUS_SUCCESS; } ROCTRACER_API roctracer_status_t roctracer_enable_activity_expl(roctracer_pool_t* pool) { for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) if (auto status = roctracer_enable_domain_activity_expl((roctracer_domain_t)domain, pool); status != ROCTRACER_STATUS_SUCCESS) return status; return ROCTRACER_STATUS_SUCCESS; } ROCTRACER_API roctracer_status_t roctracer_enable_activity() { for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) if (auto status = roctracer_enable_domain_activity((roctracer_domain_t)domain); status != ROCTRACER_STATUS_SUCCESS) return status; return ROCTRACER_STATUS_SUCCESS; } ROCTRACER_API roctracer_status_t roctracer_disable_activity() { for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) if (auto status = roctracer_disable_domain_activity((roctracer_domain_t)domain); status != ROCTRACER_STATUS_SUCCESS) return status; return ROCTRACER_STATUS_SUCCESS; } } // extern "C" ./src/roctracer/registration_table.h0000664000175100017510000001072215176134562017724 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UTIL_CALLBACK_TABLE_H_ #define UTIL_CALLBACK_TABLE_H_ #include "ext/prof_protocol.h" #include #include #include #include #include #include namespace roctracer::util { #if __GNUC__ == 11 || __GNUCC__ == 12 // Starting with gcc-11 (verified with gcc-12 as well), an array out-of-bounds subscript error is // reported for accessing the registration table element at the operation ID index. Validating the // index in the function calling Register/Unregister does not quiet the warning/error in release // builds, so, for gcc-11 and gcc-12, we disable that warning just for this class. #define IGNORE_GCC_ARRAY_BOUNDS_ERROR 1 #endif // __GNUC__ == 11 || __GNUCC__ == 12 #if IGNORE_GCC_ARRAY_BOUNDS_ERROR #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Warray-bounds" #endif // IGNORE_GCC_ARRAY_BOUNDS_ERROR namespace detail { struct False { constexpr bool operator()() { return false; } }; } // namespace detail // Generic callbacks table template class RegistrationTable { public: struct table_element_t { std::atomic enabled{false}; mutable std::shared_mutex mutex; T data; }; template void Register(uint32_t operation_id, Args... args) { assert(operation_id < N && "operation_id is out of range"); table_element_t& entry = table_.at(operation_id); std::unique_lock lock(entry.mutex); if (!entry.enabled.exchange(true, std::memory_order_relaxed)) registered_count_.fetch_add(1, std::memory_order_relaxed); entry.data = T{std::forward(args)...}; } void Unregister(uint32_t operation_id) { assert(operation_id < N && "id is out of range"); table_element_t& entry = table_.at(operation_id); std::unique_lock lock(entry.mutex); if (entry.enabled.exchange(false, std::memory_order_relaxed)) registered_count_.fetch_sub(1, std::memory_order_relaxed); } std::optional Get(uint32_t operation_id) const { assert(operation_id < N && "id is out of range"); const table_element_t& entry = table_.at(operation_id); if (!entry.enabled.load(std::memory_order_relaxed) || IsStopped{}()) return std::nullopt; std::shared_lock lock(entry.mutex); return entry.enabled.load(std::memory_order_relaxed) ? std::make_optional(entry.data) : std::nullopt; } // Like Get(), but skips the IsStopped check. Used to drain in-flight records // that were already committed (counter incremented) before stop was signaled. std::optional GetForDrain(uint32_t operation_id) const { assert(operation_id < N && "id is out of range"); const table_element_t& entry = table_.at(operation_id); if (!entry.enabled.load(std::memory_order_relaxed)) return std::nullopt; std::shared_lock lock(entry.mutex); return entry.enabled.load(std::memory_order_relaxed) ? std::make_optional(entry.data) : std::nullopt; } bool IsEmpty() const { return registered_count_.load(std::memory_order_relaxed) == 0; } private: std::atomic registered_count_{0}; std::array table_{}; }; #if IGNORE_GCC_ARRAY_BOUNDS_ERROR #pragma GCC diagnostic pop #endif // IGNORE_GCC_ARRAY_BOUNDS_ERROR } // namespace roctracer::util #endif // UTIL_CALLBACK_TABLE_H_ ./src/roctracer/loader.h0000664000175100017510000001525715176134562015321 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef ROCTRACER_LOADER_H_ #define ROCTRACER_LOADER_H_ #include "debug.h" #include #include #include #include #include namespace fs = std::experimental::filesystem; namespace roctracer { // Base loader class template class BaseLoader { protected: BaseLoader(const char* pattern) { // Iterate through the process' loaded shared objects and try to dlopen the first entry with a // file name starting with the given 'pattern'. This allows the loader to acquire a handle // to the target library iff it is already loaded. The handle is used to query symbols // exported by that library. auto callback = [this, pattern](dl_phdr_info* info) { if (handle_ == nullptr && fs::path(info->dlpi_name).filename().string().rfind(pattern, 0) == 0) handle_ = ::dlopen(info->dlpi_name, RTLD_LAZY); }; dl_iterate_phdr( [](dl_phdr_info* info, size_t size, void* data) { (*reinterpret_cast(data))(info); return 0; }, &callback); } ~BaseLoader() { if (handle_ != nullptr) ::dlclose(handle_); } BaseLoader(const BaseLoader&) = delete; BaseLoader& operator=(const BaseLoader&) = delete; public: bool IsEnabled() const { return handle_ != nullptr; } template FunctionPtr GetFun(const char* symbol) const { assert(IsEnabled()); auto function_ptr = reinterpret_cast(::dlsym(handle_, symbol)); if (function_ptr == nullptr) fatal("symbol lookup '%s' failed: %s", symbol, ::dlerror()); return function_ptr; } static inline Loader& Instance() { static Loader instance; return instance; } private: void* handle_; }; } // namespace roctracer // HIP runtime library loader class namespace roctracer { #if STATIC_BUILD __attribute__((weak)) const char* hipKernelNameRef(const hipFunction_t f) { return nullptr; } __attribute__((weak)) const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream) { return nullptr; } __attribute__((weak)) int hipGetStreamDeviceId(hipStream_t stream) { return 0; } __attribute__((weak)) const char* hipGetCmdName(unsigned op) { return nullptr; } __attribute__((weak)) const char* hipApiName(uint32_t id) { return nullptr; } __attribute__((weak)) void hipRegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id, void* data)) {} class HipLoader { private: HipLoader() {} public: bool IsEnabled() const { return true; } int GetStreamDeviceId(hipStream_t stream) const { return hipGetStreamDeviceId(stream); } const char* KernelNameRef(const hipFunction_t f) const { return hipKernelNameRef(f); } const char* KernelNameRefByPtr(const void* host_function, hipStream_t stream = nullptr) const { return hipKernelNameRefByPtr(host_function, stream); } const char* GetOpName(unsigned op) const { return hipGetCmdName(op); } const char* ApiName(uint32_t id) const { return hipApiName(id); } void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id, void* data)) const { return hipRegisterTracerCallback(callback); } static inline HipLoader& Instance() { static HipLoader instance; return instance; } }; #else class HipLoader : public BaseLoader { private: friend HipLoader& BaseLoader::Instance(); HipLoader() : BaseLoader("libamdhip64.so") {} public: int GetStreamDeviceId(hipStream_t stream) const { static auto function = GetFun("hipGetStreamDeviceId"); return function(stream); } const char* KernelNameRef(const hipFunction_t f) const { static auto function = GetFun("hipKernelNameRef"); return function(f); } const char* KernelNameRefByPtr(const void* host_function, hipStream_t stream = nullptr) const { static auto function = GetFun( "hipKernelNameRefByPtr"); return function(host_function, stream); } const char* GetOpName(unsigned op) const { static auto function = GetFun("hipGetCmdName"); return function(op); } const char* ApiName(uint32_t id) const { static auto function = GetFun("hipApiName"); return function(id); } void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id, void* data)) const { static auto function = GetFun("hipRegisterTracerCallback"); return function(callback); } }; #endif // ROCTX library loader class class RocTxLoader : public BaseLoader { private: friend RocTxLoader& BaseLoader::Instance(); RocTxLoader() : BaseLoader("libroctx64.so") {} public: void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id, void* data)) const { static auto function = GetFun("roctxRegisterTracerCallback"); return function(callback); } }; } // namespace roctracer #endif // ROCTRACER_LOADER_H_./src/roctracer/memory_pool.h0000664000175100017510000002132515176134562016405 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef MEMORY_POOL_H_ #define MEMORY_POOL_H_ #include "roctracer.h" #include #include #include #include #include #include #include #include namespace roctracer { class MemoryPool { public: MemoryPool(const roctracer_properties_t& properties) : properties_(properties) { // Pool definition: The memory pool is split in 2 buffers of equal size. When first initialized, // the write pointer points to the first element of the first buffer. When a buffer is full, or // when Flush() is called, the write pointer moves to the other buffer. // Each buffer should be large enough to hold at least 2 activity records, as record pairs may // be written when external correlation ids are used. const size_t allocation_size = 2 * std::max(2 * sizeof(roctracer_record_t), properties_.buffer_size); pool_begin_ = nullptr; AllocateMemory(&pool_begin_, allocation_size); assert(pool_begin_ != nullptr && "pool allocator failed"); pool_end_ = pool_begin_ + allocation_size; buffer_begin_ = pool_begin_; buffer_end_ = buffer_begin_ + properties_.buffer_size; record_ptr_ = buffer_begin_; data_ptr_ = buffer_end_; // Create a consumer thread and wait for it to be ready to accept work. std::promise ready; std::future future = ready.get_future(); consumer_thread_ = std::thread(&MemoryPool::ConsumerThreadLoop, this, std::move(ready)); future.wait(); } ~MemoryPool() { Flush(); // Wait for the previous flush to complete, then send the exit signal. NotifyConsumerThread(nullptr, nullptr); consumer_thread_.join(); // Free the pool's buffer memory. AllocateMemory(&pool_begin_, 0); } MemoryPool(const MemoryPool&) = delete; MemoryPool& operator=(const MemoryPool&) = delete; template > void Write(Record&& record, const void* data, size_t data_size, Functor&& store_data = {}) { assert(data != nullptr || data_size == 0); // If data is null, then data_size must be 0 std::lock_guard producer_lock(producer_mutex_); // The amount of memory reserved in the buffer to store data. If the data cannot fit because it // is larger than the buffer size minus one record, then the data won't be copied into the // buffer. size_t reserve_data_size = data_size <= (properties_.buffer_size - sizeof(Record)) ? data_size : 0; std::byte* next_record = record_ptr_ + sizeof(Record); if (next_record > (data_ptr_ - reserve_data_size)) { NotifyConsumerThread(buffer_begin_, record_ptr_); SwitchBuffers(); next_record = record_ptr_ + sizeof(Record); assert(next_record <= buffer_end_ && "buffer size is less then the record size"); } // Store data in the record. Copy the data first if it fits in the buffer // (reserve_data_size != 0). if (reserve_data_size) { data_ptr_ -= data_size; ::memcpy(data_ptr_, data, data_size); store_data(record, data_ptr_); } else if (data != nullptr) { store_data(record, data); } // Store the record into the buffer, and increment the write pointer. ::memcpy(record_ptr_, &record, sizeof(Record)); record_ptr_ = next_record; // If the data does not fit in the buffer, flush the buffer with the record as is. We don't copy // the data so we make sure that the record and its data are processed by waiting until the // flush is complete. if (data != nullptr && reserve_data_size == 0) { NotifyConsumerThread(buffer_begin_, record_ptr_); SwitchBuffers(); { std::unique_lock consumer_lock(consumer_mutex_); consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; }); } } } template void Write(Record&& record) { using DataPtr = void*; Write(std::forward(record), DataPtr(nullptr), 0, {}); } // Flush the records and block until they are all made visible to the client. void Flush() { { std::lock_guard producer_lock(producer_mutex_); if (record_ptr_ == buffer_begin_) return; NotifyConsumerThread(buffer_begin_, record_ptr_); SwitchBuffers(); } { // Wait for the current operation to complete. std::unique_lock consumer_lock(consumer_mutex_); consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; }); } } private: void SwitchBuffers() { buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; buffer_end_ = buffer_begin_ + properties_.buffer_size; record_ptr_ = buffer_begin_; data_ptr_ = buffer_end_; } void ConsumerThreadLoop(std::promise ready) { std::unique_lock consumer_lock(consumer_mutex_); // This consumer is now ready to accept work. ready.set_value(); while (true) { consumer_cond_.wait(consumer_lock, [this]() { return consumer_arg_.valid; }); // begin == end == nullptr means the thread needs to exit. if (consumer_arg_.begin == nullptr && consumer_arg_.end == nullptr) break; properties_.buffer_callback_fun(reinterpret_cast(consumer_arg_.begin), reinterpret_cast(consumer_arg_.end), properties_.buffer_callback_arg); // Mark this operation as complete (valid=false) and notify all producers that may be // waiting for this operation to finish, or to start a new operation. See comment below in // NotifyConsumerThread(). consumer_arg_.valid = false; consumer_cond_.notify_all(); } } void NotifyConsumerThread(const std::byte* data_begin, const std::byte* data_end) { std::unique_lock consumer_lock(consumer_mutex_); // If consumer_arg_ is still in use (valid=true), then wait for the consumer thread to finish // processing the current operation. Multiple producers may wait here, one will be allowed to // continue once the consumer thread is idle and valid=false. This prevents a race condition // where operations would be lost if multiple producers could enter this critical section // (sequentially) before the consumer thread could re-acquire the consumer_mutex_ lock. consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; }); consumer_arg_.begin = data_begin; consumer_arg_.end = data_end; consumer_arg_.valid = true; consumer_cond_.notify_all(); } void AllocateMemory(std::byte** ptr, size_t size) const { if (properties_.alloc_fun != nullptr) { // Use the custom allocator provided in the properties. properties_.alloc_fun(reinterpret_cast(ptr), size, properties_.alloc_arg); return; } // No custom allocator was provided so use the default malloc/realloc/free allocator. if (*ptr == nullptr) { *ptr = static_cast(malloc(size)); } else if (size != 0) { *ptr = static_cast(realloc(*ptr, size)); } else { free(*ptr); *ptr = nullptr; } } // Properties used to create the memory pool. const roctracer_properties_t properties_; // Pool definition std::byte* pool_begin_; std::byte* pool_end_; std::byte* buffer_begin_; std::byte* buffer_end_; std::byte* record_ptr_; std::byte* data_ptr_; std::mutex producer_mutex_; // Consumer thread std::thread consumer_thread_; struct { const std::byte* begin; const std::byte* end; bool valid = false; } consumer_arg_; std::mutex consumer_mutex_; std::condition_variable consumer_cond_; }; } // namespace roctracer #endif // MEMORY_POOL_H_ ./src/roctx/0000775000175100017510000000000015176134562013043 5ustar jenkinsjenkins./src/roctx/roctx.cpp0000664000175100017510000000676415176134562014723 0ustar jenkinsjenkins/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "roctx.h" #include "roctracer_roctx.h" #include "ext/prof_protocol.h" #include #include namespace { std::atomic report_activity; thread_local int nested_range_level{0}; void ReportActivity(roctx_api_id_t operation_id, const char* message = nullptr, roctx_range_id_t id = {}) { auto function = report_activity.load(std::memory_order_relaxed); if (!function) return; roctx_api_data_t api_data{}; switch (operation_id) { case ROCTX_API_ID_roctxMarkA: api_data.args.roctxMarkA.message = message; break; case ROCTX_API_ID_roctxRangePushA: api_data.args.roctxRangePushA.message = message; break; case ROCTX_API_ID_roctxRangePop: break; case ROCTX_API_ID_roctxRangeStartA: api_data.args.roctxRangeStartA.message = message; api_data.args.roctxRangeStartA.id = id; break; case ROCTX_API_ID_roctxRangeStop: api_data.args.roctxRangeStop.id = id; break; default: assert(!"should not reach here"); } function(ACTIVITY_DOMAIN_ROCTX, operation_id, &api_data); } } // namespace ROCTX_API uint32_t roctx_version_major() { return ROCTX_VERSION_MAJOR; } ROCTX_API uint32_t roctx_version_minor() { return ROCTX_VERSION_MINOR; } ROCTX_API void roctxMarkA(const char* message) { ReportActivity(ROCTX_API_ID_roctxMarkA, message); } ROCTX_API int roctxRangePushA(const char* message) { ReportActivity(ROCTX_API_ID_roctxRangePushA, message); return nested_range_level++; } ROCTX_API int roctxRangePop() { ReportActivity(ROCTX_API_ID_roctxRangePop); if (nested_range_level == 0) return -1; return --nested_range_level; } ROCTX_API roctx_range_id_t roctxRangeStartA(const char* message) { static std::atomic start_stop_range_id(1); auto range_id = start_stop_range_id++; ReportActivity(ROCTX_API_ID_roctxRangeStartA, message, range_id); return range_id; } ROCTX_API void roctxRangeStop(roctx_range_id_t range_id) { ReportActivity(ROCTX_API_ID_roctxRangeStop, nullptr, range_id); } extern "C" ROCTX_EXPORT void roctxRegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id, void* data)) { report_activity.store(function, std::memory_order_relaxed); } ./src/roctx/exportmap0000664000175100017510000000035715176134562015012 0ustar jenkinsjenkinsROCTX_4.1 { global: roctxMarkA; roctxRangePop; roctxRangePushA; roctxRangeStartA; roctxRangeStop; roctxRegisterTracerCallback; roctx_version_major; roctx_version_minor; local: *; }; ./src/hip_stats/0000775000175100017510000000000015176134562013702 5ustar jenkinsjenkins./src/hip_stats/hip_stats.cpp0000664000175100017510000002453215176134562016412 0ustar jenkinsjenkins/* Copyright (c) 2022 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "roctracer.h" #include "roctracer_hip.h" #include #include #include #include #include #include #include #include #include #include #include #include #define CHECK_ROCTRACER(call) \ do { \ roctracer_status_t status = call; \ if (status != ROCTRACER_STATUS_SUCCESS) { \ std::cerr << roctracer_error_string() << std::endl; \ abort(); \ } \ } while (false) namespace { constexpr uint64_t NextPowerOf2(uint64_t v) { v += (v == 0); v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v |= v >> 32; return ++v; } constexpr size_t KiB = 1024; constexpr size_t MiB = KiB * KiB; constexpr size_t GiB = KiB * KiB * KiB; std::string HumanReadableSize(size_t size, int precision) { std::stringstream ss; if (size < KiB) ss << size; else if (size < MiB) ss << std::fixed << std::setprecision(precision) << (double)size / KiB << "K"; else if (size < GiB) ss << std::fixed << std::setprecision(precision) << (double)size / MiB << "M"; else ss << std::fixed << std::setprecision(precision) << (double)size / GiB << "G"; return ss.str(); } struct FunctionStats { uint64_t total_time_ns; uint64_t count; void Accumulate(uint64_t time_ns) { total_time_ns += time_ns; ++count; } }; struct MemCopyStats { uint64_t total_time_ns; uint64_t total_byte_size; uint64_t count; void Accumulate(uint64_t time_ns, uint64_t byte_size) { total_time_ns += time_ns; total_byte_size += byte_size; ++count; } }; struct pair_hash { template std::size_t operator()(const std::pair& pair) const { return std::hash()(pair.first) ^ std::hash()(pair.second); } }; std::unordered_map hip_api_stats; std::unordered_map kernel_stats; std::unordered_map, MemCopyStats, pair_hash> memcpy_stats; void CollectStatistics(const char* begin, const char* end, void* /* user_arg */) { const auto* record = reinterpret_cast(begin); while (record < reinterpret_cast(end)) { auto elapsed_time_ns = record->end_ns - record->begin_ns; if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_DISPATCH) { const char* kernel_name = record->kernel_name; if (kernel_name == nullptr) kernel_name = "Unknown kernels"; kernel_stats[kernel_name].Accumulate(elapsed_time_ns); } else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_COPY) memcpy_stats[std::make_pair(record->kind, NextPowerOf2(record->bytes))].Accumulate( elapsed_time_ns, record->bytes); else if (record->domain == ACTIVITY_DOMAIN_HIP_API) hip_api_stats[record->op].Accumulate(elapsed_time_ns); CHECK_ROCTRACER(roctracer_next_record(record, &record)); } } namespace fs = std::experimental::filesystem; void DumpStatistics() { CHECK_ROCTRACER(roctracer_close_pool()); fs::path output_dir = []() { const char* env_var = getenv("ROCP_OUTPUT_DIR"); return env_var != nullptr ? env_var : ""; }(); std::ofstream out; if (output_dir.empty()) { // If an output directory was not specified, then print the statistics to stdout. out.copyfmt(std::cout); out.clear(std::cout.rdstate()); out.basic_ios::rdbuf(std::cout.rdbuf()); } else { if (auto status = fs::status(output_dir); !fs::exists(status) || !fs::is_directory(status)) { std::cerr << "error: ROCP_OUTPUT_DIR=" << output_dir << " is not a directory" << std::endl; return; } } auto compare = [](const auto& x, const auto& y) { return x.second.total_time_ns > y.second.total_time_ns; }; // Print the HIP API statistics sorted by descending total inclusive time. if (!hip_api_stats.empty()) { const char* filename = "hip_api_stats.csv"; if (!output_dir.empty()) out = std::ofstream(output_dir / filename); if (out.good()) { std::cout << "Dumping HIP API statistics." << std::endl; uint64_t total_hip_api_time_ns = std::accumulate(hip_api_stats.begin(), hip_api_stats.end(), 0, [](uint64_t total_time_ns, const auto& stats) { return total_time_ns + stats.second.total_time_ns; }); out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl; for (auto&& [op, stats] : std::set( hip_api_stats.begin(), hip_api_stats.end(), compare)) out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op, 0) << "\"," << stats.count << "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4) << (double)stats.total_time_ns / total_hip_api_time_ns * 100 << std::endl; } else { std::cerr << "warning: could not open " << output_dir / filename << std::endl; } } // Print the HIP kernel dispatch statistics sorted by descending execution time. if (!kernel_stats.empty()) { const char* filename = "hip_kernel_stats.csv"; if (!output_dir.empty()) out = std::ofstream(output_dir / filename); if (out.good()) { std::cout << "Dumping HIP kernel dispatch statistics." << std::endl; uint64_t total_kernel_time_ns = std::accumulate(kernel_stats.begin(), kernel_stats.end(), 0, [](uint64_t total_time_ns, const auto& stats) { return total_time_ns + stats.second.total_time_ns; }); out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl; for (auto&& [name, stats] : std::set( kernel_stats.begin(), kernel_stats.end(), compare)) out << "\"" << name << "\"," << stats.count << "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4) << (double)stats.total_time_ns / total_kernel_time_ns * 100 << std::endl; } else { std::cerr << "warning: could not open " << output_dir / filename << std::endl; } } // Print the HIP memory copy statistics sorted by descending transfer time. if (!memcpy_stats.empty()) { const char* filename = "hip_copy_stats.csv"; if (!output_dir.empty()) out = std::ofstream(output_dir / filename); if (out.good()) { std::cout << "Dumping HIP memory copy statistics." << std::endl; uint64_t total_memory_copy_time_ns = std::accumulate(memcpy_stats.begin(), memcpy_stats.end(), 0, [](uint64_t total_time_ns, const auto& stats) { return total_time_ns + stats.second.total_time_ns; }); out << "\"Name\",\"Calls\",\"TotalBytes\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl; for (auto&& [kind, stats] : std::set( memcpy_stats.begin(), memcpy_stats.end(), compare)) out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, kind.first) << "(" << HumanReadableSize(kind.second >> 1, 0) << "-" << HumanReadableSize(kind.second, 0) << ")" << "\"," << stats.count << "," << stats.total_byte_size << "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4) << (double)stats.total_time_ns / total_memory_copy_time_ns * 100 << std::endl; } else { std::cerr << "warning: could not open " << output_dir / filename << std::endl; } } } } // namespace #include extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* /* table */, uint64_t /* runtime_version */, uint64_t /* failed_tool_count */, const char* const* /* failed_tool_names */) { roctracer_properties_t properties{}; properties.buffer_size = sizeof(roctracer_record_t) * 10000; properties.buffer_callback_fun = CollectStatistics; properties.buffer_callback_arg = nullptr; CHECK_ROCTRACER(roctracer_open_pool(&properties)); CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH)); CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY)); std::atexit([]() { DumpStatistics(); }); return true; } extern "C" ROCTRACER_EXPORT void OnUnload() {} ./run_test.sh0000775000175100017510000000264715176134562013330 0ustar jenkinsjenkins#!/bin/bash ################################################################################ # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. ################################################################################ ROCM_PATH="${ROCM_PATH:=/opt/rocm}" fatal() { echo "$1" exit 1 } if [ -z "$BUILD_DIR" ] ; then export BUILD_DIR=$PWD; fi cd $BUILD_DIR ./run.sh exit 0 ./build_static.sh0000775000175100017510000000473615176134562014134 0ustar jenkinsjenkins#!/bin/bash -x ################################################################################ # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. ################################################################################ SRC_DIR=`dirname $0` COMPONENT="roctracer" ROCM_PATH="${ROCM_PATH:=/opt/rocm}" LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64" DEFAULTS=defaults.sh fatal() { echo "$1" exit 1 } umask 022 if [ -e "$DEFAULTS" ] ; then source "$DEFAULTS"; fi if [ -z "$ROCTRACER_ROOT" ]; then ROCTRACER_ROOT=$SRC_DIR; fi if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$PWD; fi if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi mkdir -p $BUILD_DIR pushd $BUILD_DIR cmake \ -DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DCMAKE_PREFIX_PATH="$PREFIX_PATH" \ -DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \ -DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \ -DLIBRARY_TYPE=STATIC \ $ROCTRACER_ROOT make exit 0 ./CODEOWNERS0000664000175100017510000000002415176134562012504 0ustar jenkinsjenkins* @ammarwa @bgopesh ./LICENSE.md0000664000175100017510000000207015176134562012520 0ustar jenkinsjenkinsMIT License Copyright (C) Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ./.azuredevops/0000775000175100017510000000000015176134566013546 5ustar jenkinsjenkins./.azuredevops/rocm-ci.yml0000664000175100017510000000116515176134566015625 0ustar jenkinsjenkinsresources: repositories: - repository: pipelines_repo type: github endpoint: ROCm name: ROCm/ROCm variables: - group: common - template: /.azuredevops/variables-global.yml@pipelines_repo trigger: batch: true branches: include: - amd-mainline - amd-staging paths: exclude: - .github - doc - LICENSE - README.md pr: autoCancel: true branches: include: - amd-mainline - amd-staging paths: exclude: - .github - doc - LICENSE - README.md drafts: false jobs: - template: ${{ variables.CI_COMPONENT_PATH }}/roctracer.yml@pipelines_repo ./.gitignore0000664000175100017510000000005215176134562013102 0ustar jenkinsjenkins.* !.gitignore *.o *.exe *.swp *.Po build ./doc/0000775000175100017510000000000015176134562011662 5ustar jenkinsjenkins./doc/roctracer_spec.md0000664000175100017510000007012215176134562015204 0ustar jenkinsjenkins# ROC Tracer / ROC-TX Libraries Specification ``` ROC Tracer API version 2 ROC-TX API version 1 - The rocTracer API is agnostic to specific runtime and may trace the runtime API calls and asynchronous GPU activity. - The rocTX API is provided for application code annotation. ``` ## 1. High level overview ``` The goal of the implementation is to provide a runtime independent API for tracing of runtime calls and asynchronous activity, like GPU kernel dispatches and memory moves. The tracing includes callback API for runtime API tracing and activity API for asynchronous activity records logging. Depending on particular runtime intercepting mechanism, the rocTracer library can be dynamically linked, dynamically loaded by the runtime as a plugin or some API wrapper can be loaded using LD_PRELOAD. The library has a C API. The rocTracer library is an API that intercepts runtime API calls and traces asynchronous activity. The activity tracing results are recorded in a ring buffer. The rocTX contains application code instrumentation API to support high level correlation of runtime API/activity events. The API includes mark and nested ranges. ``` ## 2. General API ### 2.1. Description ``` The library supports method for getting the error number and error string of the last failed library API call. It allows to check the conformance of used library API header and the library binary, the version macros and API methods can be used. Returning the error and error string methods: • roctracer_status_t – error code enumeration • roctracer_error_string – method for returning the error string Library version: • ROCTRACER_VERSION_MAJOR – API major version macro • ROCTRACER_VERSION_MINOR – API minor version macro • roctracer_version_major – library major version • roctracer_version_minor – library minor version ``` ### 2.2. Error codes and error string methods ``` Error code enumeration: typedef enum { ROCTRACER_STATUS_SUCCESS = 0, ROCTRACER_STATUS_ERROR = 1, ROCTRACER_STATUS_UNINIT = 2, ROCTRACER_STATUS_BREAK = 3, ROCTRACER_STATUS_BAD_DOMAIN = 4, ROCTRACER_STATUS_BAD_PARAMETER = 5, ROCTRACER_STATUS_HIP_API_ERR = 6, ROCTRACER_STATUS_HCC_OPS_ERR = 7, ROCTRACER_STATUS_ROCTX_ERR = 8, } roctracer_status_t; Return error string: const char* roctracer_error_string(); ``` ### 2.3. Library version ``` The library provides major and minor versions. Major version is for incompatible API changes and minor version for bug fixes. API version macros defined in the library API header ‘roctracer.h’: ROCTRACER_VERSION_MAJOR ROCTRACER_VERSION_MINOR Methods to check library major and minor venison: uint32_t roctracer_major_version(); uint32_t roctracer_minor_version(); ``` ## 3. Frontend API ### 3.1. Description ``` The rocTracer provides support for runtime API callbacks and activity records logging. The APIs of different runtimes at different levels are considered as different API domains with assigned domain IDs. For example, language level and driver level. The API callbacks provide the API calls arguments and are called on two phases on “enter” and on “exit”. The activity records are logged to the ring buffer and can be associated with the respective API calls using the correlation ID. Activity API can be used to enable collecting of the records with timestamping data for API calls and asynchronous activity like the kernel submits, memory copies and barriers Tracing domains: • roctracer_domain_t – runtime API domains, HIP, HSA, etc… • roctracer_op_string – Return Op string by given domain and activity Op code • roctracer_op_code – Return Op code and kind by given string Callback API: • roctracer_rtapi_callback_t – runtime API callback type • roctracer_enable_op_callback – enable runtime API callback by domain and Op code • roctracer_enable_domain_callback – enable runtime API callback by domain for all Ops • roctracer_enable_callback – enable runtime API callback for all domains, all Ops • roctracer_disable_op_callback – disable runtime API callback by domain and Op code • roctracer_enable_op_callback – enable runtime API callback by domain for all Ops • roctracer_enable_op_callback – enable runtime API callback for all domains, all Ops Activity API: • roctracer_record_t – activity record • roctracer_pool_t – records pool type • roctracer_allocator_t – tracer allocator type • roctracer_buffer_callback_t – pool callback type • roctracer_open_pool[_expl] – create records pool • roctracer_close_pool[_expl] – close records pool • roctracer_default_pool[_expl] – get/set default pool • roctracer_properties_t – tracer properties • roctracer_enable_op_activity[_expl] – enable activity records logging • roctracer_enable_domain_activity[_expl] – enable activity records logging • roctracer_enable_activity[_expl] – enable activity records logging • roctracer_disable_op_activity – disable activity records logging • roctracer_disable_domain_activity – disable activity records logging • roctracer_disable_activity – disable activity records logging • roctracer_flush_activity[_expl] – disable activity records logging • roctracer_next_record – return next record • roctracer_get_timestamp – return correlated GPU/CPU system timestamp External correlation ID API: • roctracer_activity_push_external_correlation_id - push an external correlation id for the calling thread • roctracer_activity_pop_external_correlation_id - pop an external correlation id for the calling thread Tracing control API: • roctracer_start – tracing start • roctracer_stop – tracer stop ``` ### 3.2. Tracing Domains ``` Various tracing domains are supported. Each domain is assigned with a domain ID. The domains include HSA, HIP runtime levels. Traced API domains: typedef enum { ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain ACTIVITY_DOMAIN_HIP_API = 2, // HIP API domain ACTIVITY_DOMAIN_HIP_OPS = 3, // HIP async activity domain ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain ACTIVITY_DOMAIN_NUMBER = 7 } activity_domain_t; Return name by given domain and Op code: const char* roctracer_op_string( // NULL returned on error and error number is set uint32_t domain, // tracing domain uint32_t op, // activity op code uint32_t kind); // activity kind Return Op code and kind by given string: roctracer_status_t roctracer_op_code( uint32_t domain, // tracing domain const char* str, // [in] op string uint32_t* op, // [out] op code uint32_t* kind); // [out] op kind code if not NULL ``` ### 3.3. Callback API ``` The tracer provides support for runtime API callbacks and activity records logging. The API callbacks provide the API calls arguments and are called on two phases on “enter”, on “exit”. API phase passed to the callbacks: typedef enum { ROCTRACER_API_PHASE_ENTER, ROCTRACER_API_PHASE_EXIT, } roctracer_api_phase_t; Runtime API callback type: typedef void (*roctracer_rtapi_callback_t)( uint32_t domain, // runtime API domain uint32_t cid, // API call ID const void* data, // [in] callback data with correlation id and the call // arguments void* arg); // [in/out] user passed data Enable runtime API callbacks: roctracer_status_t roctracer_enable_op_callback( activity_domain_t domain, // tracing domain uint32_t op, // API call ID activity_rtapi_callback_t callback, // callback function pointer void* arg); // [in/out] callback arg roctracer_status_t roctracer_enable_domain_callback( activity_domain_t domain, // tracing domain activity_rtapi_callback_t callback, // callback function pointer void* arg); // [in/out] callback arg roctracer_status_t roctracer_enable_callback( activity_rtapi_callback_t callback, // callback function pointer void* arg); // [in/out] callback arg Disable runtime API callbacks: roctracer_status_t roctracer_disable_op_callback( activity_domain_t domain, // tracing domain uint32_t op); // API call ID roctracer_status_t roctracer_disable_domain_callback( activity_domain_t domain); // tracing domain roctracer_status_t roctracer_disable_callback(); ``` ### 3.4 Activity API The activity records are asynchronously logged to the pool and can be associated with the respective API callbacks using the correlation ID. Activity API can be used to enable collecting the records with timestamp data for API calls and GPU activity like kernel submits, memory copies, and barriers. ``` // Correlation id typedef uint64_t activity_correlation_id_t; Activity record type: // Activity record type struct activity_record_t { uint32_t domain; // activity domain id activity_kind_t kind; // activity kind activity_op_t op; // activity op activity_correlation_id_t correlation_id; // activity ID uint64_t begin_ns; // host begin timestamp uint64_t end_ns; // host end timestamp union { struct { int device_id; // device id uint64_t queue_id; // queue id }; struct { uint32_t process_id; // device id uint32_t thread_id; // thread id }; struct { activity_correlation_id_t external_id; // external correlation id }; }; size_t bytes; // data size bytes }; ``` > [!NOTE] > rocprofiler reported device ids are in sync with node-ids reported by KFD(kernel). This can easily be verified by `rocm-smi` under `Node`. > Please also note that this device id might not be in sync with the ones provided by `hipGetDeviceProperties` which includes CPU agents and starts from 0. ``` Return next record: static inline int roctracer_next_record( const activity_record_t* record, // [in] record ptr const activity_record_t** next); // [out] next record ptr Tracer allocator type: typedef void (*roctracer_allocator_t)( char** ptr, // memory pointer size_t size, // memory size void* arg); // allocator arg Pool callback type: typedef void (*roctracer_buffer_callback_t)( const char* begin, // [in] available buffered trace records const char* end, // [in] end of buffered trace records void* arg); // [in/out] callback arg Tracer properties: typedef struct { uint32_t mode; // roctracer mode size_t buffer_size; // buffer size // power of 2 roctracer_allocator_t alloc_fun; // memory allocator // function pointer void* alloc_arg; // memory allocator // function pointer roctracer_buffer_callback_t buffer_callback_fun; // tracer record // callback function void* buffer_callback_arg; // tracer record // callback arg } roctracer_properties_t; Tracer memory pool handle type: typedef void roctracer_pool_t; Create tracer memory pool: roctracer_status_t roctracer_open_pool( const roctracer_properties_t* properties); // tracer pool properties roctracer_status_t roctracer_open_pool_expl( const roctracer_properties_t* properties, // tracer pool properties roctracer_pool_t** pool); // [out] returns tracer pool if // not NULL, otherwise sets the // default one if it is not set // yet; otherwise the error is // generated Close tracer memory pool: roctracer_status_t roctracer_close_pool(); roctracer_status_t roctracer_close_pool_expl( roctracer_pool_t* pool); // memory pool, NULL means default pool Return current default pool. Set new default pool if the argument is not NULL: roctracer_pool_t* roctracer_default_pool(); roctracer_pool_t* roctracer_default_pool_expl( roctracer_pool_t* pool); // new default pool if not NULL ``` Enable activity records logging: ``` roctracer_status_t roctracer_enable_op_activity( activity_domain_t domain, // tracing domain uint32_t op); // activity op ID roctracer_status_t roctracer_enable_op_activity_expl( activity_domain_t domain, // tracing domain uint32_t op, // activity op ID roctracer_pool_t* pool); // memory pool, NULL means default pool roctracer_status_t roctracer_enable_domain_activity( activity_domain_t domain); // tracing domain roctracer_status_t roctracer_enable_domain_activity_expl( activity_domain_t domain, // tracing domain roctracer_pool_t* pool); // memory pool, NULL means default pool roctracer_status_t roctracer_enable_activity(); roctracer_status_t roctracer_enable_activity_expl( roctracer_pool_t* pool); // memory pool, NULL means default pool Disable activity records logging: roctracer_status_t roctracer_disable_op_activity( activity_domain_t domain, // tracing domain uint32_t op); // activity op ID roctracer_status_t roctracer_disable_domain_activity( activity_domain_t domain); // tracing domain roctracer_status_t roctracer_disable_activity(); Flush available activity records: roctracer_status_t roctracer_flush_activity(); roctracer_status_t roctracer_flush_activity_expl( roctracer_pool_t* pool); // memory pool, NULL means default pool Return correlated GPU/CPU system timestamp: roctracer_status_t roctracer_get_timestamp( uint64_t* timestamp); // [out] return timestamp ``` External correlation ID API ``` The API provides activity records to associate rocTracer correlation IDs with IDs provided by external APIs. The external ID records are identified by ACTIVITY_DOMAIN_EXT_API domain value. Using the ‘push’ method an external ID is pushed to a per CPU thread stack and the ‘pop’ method can be used to remove the last pushed ID. An external ID record is inserted before any generated rocTracer activity record if the same CPU external ID stack is non-empty. Notifies that the calling thread is entering an external API region. Push an external correlation id for the calling thread. roctracer_status_t roctracer_activity_push_external_correlation_id( activity_correlation_id_t id); // external correlation id Notifies that the calling thread is leaving an external API region. Pop an external correlation id for the calling thread. roctracer_status_t roctracer_activity_pop_external_correlation_id( activity_correlation_id_t* last_id); // returns the last external correlation id // if not NULL ``` Tracing control API ``` Tracing start: void roctracer_start(); Tracing stop: void roctracer_stop(); ``` ## 4. rocTracer Usage Code Examples ### 4.1. HIP API ops, GPU Activity Tracing ``` #include // HIP API callback function void hip_api_callback( uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const hip_api_data_t* data = reinterpret_cast (callback_data); fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid), cid, data->correlation_id, (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); } // Activity tracing callback void activity_callback(const char* begin, const char* end, void* arg) { const roctracer_record_t* record = reinterpret_cast(begin); const roctracer_record_t* end_record = reinterpret_cast(end); fprintf(stdout, "\tActivity records:\n"); while (record < end_record) { const char * name = roctracer_op_string(record->domain, record->activity_id, 0); fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) device_id(%d) stream_id(%lu)\n", name, record->correlation_id, record->begin_ns, record->end_ns, record->device_id, record->stream_id ); ROCTRACER_CALL(roctracer_next_record(record, &record)); } } int main() { // Allocating tracing pool roctracer_properties_t properties{}; properties.buffer_size = 12; properties.buffer_callback_fun = activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); // Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP // API calls. ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipModuleLaunchKernel, hip_api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipModuleLaunchKernel)); // Enable HIP kernel dispatch activity tracing ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH)); // Disable tracing and closing the pool ROCTRACER_CALL(roctracer_disable_callback()); ROCTRACER_CALL(roctracer_disable_activity()); ROCTRACER_CALL(roctracer_close_pool()); } ``` ### 4.2. MatrixTranspose HIP sample with all APIs/activity tracing enabled ``` This shows a MatrixTranspose HIP sample with enabled tracing of all HIP API and all GPU asynchronous activity. /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include // hip header file #include #ifndef ITERATIONS # define ITERATIONS 100 #endif #define WIDTH 1024 #define NUM (WIDTH * WIDTH) #define THREADS_PER_BLOCK_X 4 #define THREADS_PER_BLOCK_Y 4 #define THREADS_PER_BLOCK_Z 1 // Device (Kernel) function, it must be void // hipLaunchParm provides the execution configuration __global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in, const int width) { int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; out[y * width + x] = in[x * width + y]; } // CPU implementation of matrix transpose void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { for (unsigned int j = 0; j < width; j++) { for (unsigned int i = 0; i < width; i++) { output[i * width + j] = input[j * width + i]; } } } int iterations = ITERATIONS; void start_tracing(); void stop_tracing(); int main() { float* Matrix; float* TransposeMatrix; float* cpuTransposeMatrix; float* gpuMatrix; float* gpuTransposeMatrix; hipDeviceProp_t devProp; hipGetDeviceProperties(&devProp, 0); std::cout << "Device name " << devProp.name << std::endl; int i; int errors; while (iterations-- > 0) { start_tracing(); Matrix = (float*)malloc(NUM * sizeof(float)); TransposeMatrix = (float*)malloc(NUM * sizeof(float)); cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); // initialize the input data for (i = 0; i < NUM; i++) { Matrix[i] = (float)i * 10.0f; } // allocate the memory on the device side hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); // Memory transfer from host to device hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); // Lauching kernel from host hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); // Memory transfer from device to host hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); // verify the results errors = 0; double eps = 1.0E-6; for (i = 0; i < NUM; i++) { if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { errors++; } } if (errors != 0) { printf("FAILED: %d errors\n", errors); } else { printf("PASSED!\n"); } // free the resources on device side hipFree(gpuMatrix); hipFree(gpuTransposeMatrix); // free the resources on host side free(Matrix); free(TransposeMatrix); free(cpuTransposeMatrix); stop_tracing(); } return errors; } ///////////////////////////////////////////////////////////////////////////// // HIP Callbacks/Activity tracing ///////////////////////////////////////////////////////////////////////////// #include // Macro to check ROC-tracer calls status #define ROCTRACER_CALL(call) \ do { \ int err = call; \ if (err != 0) { \ std::cerr << roctracer_error_string() << std::endl << std::flush; \ abort(); \ } \ } while (0) // HIP API callback function void hip_api_callback( uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const hip_api_data_t* data = reinterpret_cast (callback_data); fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), cid, data->correlation_id, (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); if (data->phase == ACTIVITY_API_PHASE_ENTER) { switch (cid) { case HIP_API_ID_hipMemcpy: fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)", data->args.hipMemcpy.dst, data->args.hipMemcpy.src, (uint32_t)(data->args.hipMemcpy.sizeBytes), (uint32_t)(data->args.hipMemcpy.kind)); break; case HIP_API_ID_hipMalloc: fprintf(stdout, "ptr(%p) size(0x%x)", data->args.hipMalloc.ptr, (uint32_t)(data->args.hipMalloc.size)); break; case HIP_API_ID_hipFree: fprintf(stdout, "ptr(%p), data->args.hipFree.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: fprintf(stdout, "kernel(\"%s\") stream(%p)", hipKernelNameRef(data->args.hipModuleLaunchKernel.f), data->args.hipModuleLaunchKernel.stream); break; default: break; } } else { switch (cid) { case HIP_API_ID_hipMalloc: fprintf(stdout, "*ptr(0x%p)", *(data->args.hipMalloc.ptr)); break; default: break; } } fprintf(stdout, "\n"); fflush(stdout); } // Activity tracing callback // hipMalloc id(3) correlation_id(1): // begin_ns(1525888652762640464) end_ns(1525888652762877067) void activity_callback(const char* begin, const char* end, void* arg) { const roctracer_record_t* record = reinterpret_cast (begin); const roctracer_record_t* end_record = reinterpret_cast (end); fprintf(stdout, "\tActivity records:\n"); fflush(stdout); while (record < end_record) { const char * name = roctracer_op_string(record->domain, record->activity_id, 0); fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \ device_id(%d) stream_id(%lu)", name, record->correlation_id, record->begin_ns, record->end_ns, record->device_id, record->stream_id ); if (record->kind == hc::HSA_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); fprintf(stdout, "\n"); fflush(stdout); ROCTRACER_CALL(roctracer_next_record(record, &record)); } } // Start tracing routine void start_tracing() { std::cout << "# START #############################" << std::endl << std::flush; // Allocating tracing pool roctracer_properties_t properties{}; properties.buffer_size = 0x1000; properties.buffer_callback_fun = activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); // Enable API callbacks, all domains ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, NULL)); // Enable activity tracing, all domains ROCTRACER_CALL(roctracer_enable_activity()); } // Stop tracing routine void stop_tracing() { ROCTRACER_CALL(roctracer_disable_api_callback()); ROCTRACER_CALL(roctracer_disable_api_activity()); ROCTRACER_CALL(roctracer_close_pool()); std::cout << "# STOP #############################" << std::endl << std::flush; } ///////////////////////////////////////////////////////////////////////////// ``` ## 5. rocTX application code annotation API ``` Basic annotation API: markers and nested ranges. // A marker created by given ASCII massage void roctxMark(const char* message); // Returns the 0 based level of a nested range being started by given message associated to this range. // A negative value is returned on the error. int roctxRangePush(const char* message); // Marks the end of a nested range. // Returns the 0 based level the range. // A negative value is returned on the error. int roctxRangePop(); ``` ./doc/Doxyfile.in0000664000175100017510000032204515176134562014003 0ustar jenkinsjenkins## Copyright (c) 2018-2022 Advanced Micro Devices, Inc. ## ## Permission is hereby granted, free of charge, to any person obtaining a copy ## of this software and associated documentation files (the "Software"), to ## deal in the Software without restriction, including without limitation the ## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ## sell copies of the Software, and to permit persons to whom the Software is ## furnished to do so, subject to the following conditions: ## ## The above copyright notice and this permission notice shall be included in ## all copies or substantial portions of the Software. ## ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ## IN THE SOFTWARE. # Doxyfile 1.8.11 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "@PROJECT_NAME@" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = @PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@ # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@/doc/ # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = YES # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = YES # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = @CMAKE_CURRENT_SOURCE_DIR@/inc/roctracer.h @CMAKE_CURRENT_SOURCE_DIR@/inc/roctracer_plugin.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f, *.for, *.tcl, # *.vhd, *.vhdl, *.ucf, *.qsf, *.as and *.js. FILE_PATTERNS = # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse-libclang=ON option for CMake. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /