./0000775000175100017510000000000015176134512011110 5ustar jenkinsjenkins./rmake.py0000664000175100017510000002345015176134512012565 0ustar jenkinsjenkins#!/usr/bin/python3 """ Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. Manage build and installation""" import re import sys import os import subprocess import argparse import ctypes import pathlib from fnmatch import fnmatchcase args = {} param = {} OS_info = {} def parse_args(): """Parse command-line arguments""" parser = argparse.ArgumentParser(description=""" Checks build arguments """) default_gpus = 'gfx906:xnack-,gfx1030,gfx1100,gfx1101,gfx1102,gfx1151,gfx1200,gfx1201' parser.add_argument('-g', '--debug', required=False, default=False, action='store_true', help='Generate Debug build (default: False)') parser.add_argument( '--build_dir', type=str, required=False, default="build", help='Build directory path (default: build)') parser.add_argument( '--deps_dir', type=str, required=False, default=None, help='Dependencies directory path (default: build/deps)') parser.add_argument( '--skip_ld_conf_entry', required=False, default=False) parser.add_argument( '--static', required=False, default=False, dest='static_lib', action='store_true', help='Generate static library build (default: False)') parser.add_argument('-c', '--clients', required=False, default=False, dest='build_clients', action='store_true', help='Generate all client builds (default: False)') parser.add_argument('-t', '--tests', required=False, default=False, dest='build_tests', action='store_true', help='Generate unit tests only (default: False)') parser.add_argument('-i', '--install', required=False, default=False, dest='install', action='store_true', help='Install after build (default: False)') parser.add_argument( '--cmake-darg', required=False, dest='cmake_dargs', action='append', default=[], help='List of additional cmake defines for builds (e.g. CMAKE_CXX_COMPILER_LAUNCHER=ccache)') parser.add_argument('-a', '--architecture', dest='gpu_architecture', required=False, default=default_gpus, #:sramecc+:xnack-" ) #gfx1030" ) #gfx906" ) # gfx1030" ) help='Set GPU architectures, e.g. all, gfx000, gfx906:xnack-;gfx1030;gfx1100 (optional, default: all)') parser.add_argument('-v', '--verbose', required=False, default=False, action='store_true', help='Verbose build (default: False)') parser.add_argument('--no-offload-compress', required=False, default=False, action='store_true', help='Do not apply offload compression (deafult: False)') return parser.parse_args() def os_detect(): inf_file = "/etc/os-release" if os.path.exists(inf_file): with open(inf_file) as f: for line in f: if "=" in line: k,v = line.strip().split("=") OS_info[k] = v.replace('"','') else: OS_info["ID"] = 'windows' OS_info["VERSION_ID"] = 10 OS_info["NUM_PROC"] = os.cpu_count() print(OS_info) def create_dir(dir_path): if os.path.isabs(dir_path): full_path = dir_path else: fullpath = os.path.join( os.getcwd(), dir_path ) pathlib.Path(fullpath).mkdir(parents=True, exist_ok=True) return def delete_dir(dir_path) : if (not os.path.exists(dir_path)): return if (OS_info["ID"] == 'windows'): run_cmd( "RMDIR" , f"/S /Q {dir_path}") else: linux_path = pathlib.Path(dir_path).absolute() run_cmd( "rm" , f"-rf {linux_path}") def cmake_path(os_path): if OS_info["ID"] == "windows": return os_path.replace("\\", "/") else: return os.path.realpath(os_path) def config_cmd(): global args global OS_info cwd_path = os.getcwd() src_path = cwd_path.replace("\\", "/") print( f"***************** {src_path}") cmake_executable = "" cmake_options = [] cmake_platform_opts = [] if (OS_info["ID"] == 'windows'): # we don't have ROCM on windows but have hip, ROCM can be downloaded if required # CMAKE_PREFIX_PATH set to rocm_path and HIP_PATH set BY SDK Installer raw_rocm_path = cmake_path(os.getenv('HIP_PATH', "C:/hip")) rocm_path = f'"{raw_rocm_path}"' # guard against spaces in path cmake_executable = "cmake.exe" toolchain = os.path.join( src_path, "toolchain-windows.cmake" ) #set CPACK_PACKAGING_INSTALL_PREFIX= defined as blank as it is appended to end of path for archive creation cmake_platform_opts.append( f"-DWIN32=ON -DCPACK_PACKAGING_INSTALL_PREFIX=") #" -DCPACK_PACKAGING_INSTALL_PREFIX={rocm_path}" cmake_platform_opts.append( f"-DCMAKE_INSTALL_PREFIX=\"C:/hipSDK\"" ) # MSVC requires acknowledgement of using extended aligned storage. # Before VS 2017 15.8, has non-conforming alignment. VS 2017 15.8 fixes this, but inherently changes layouts of # aligned storage with extended alignment, and thus binary compatibility with such types. cmake_platform_opts.append( "-DCMAKE_CXX_FLAGS=\"-D_ENABLE_EXTENDED_ALIGNED_STORAGE\"") rocm_cmake_path = '"' + cmake_path(os.getenv("ROCM_CMAKE_PATH", "C:/hipSDK")) + '"' generator = f"-G Ninja" # "-G \"Visual Studio 16 2019\" -A x64" # -G NMake ") # cmake_options.append( generator ) else: rocm_path = os.getenv( 'ROCM_PATH', "/opt/rocm") rocm_cmake_path = '"' + rocm_path + '"' if (OS_info["ID"] in ['centos', 'rhel']): cmake_executable = "cmake3" else: cmake_executable = "cmake" toolchain = "toolchain-linux.cmake" cmake_platform_opts = [f"-DROCM_DIR:PATH={rocm_path}", f"-DCPACK_PACKAGING_INSTALL_PREFIX={rocm_path}"] tools = f"-DCMAKE_TOOLCHAIN_FILE={toolchain}" cmake_options.append( tools ) cmake_options.extend( cmake_platform_opts) # build type cmake_config = "" build_dir = args.build_dir if not args.debug: build_path = os.path.join(build_dir, "release") cmake_config="Release" else: build_path = os.path.join(build_dir, "debug") cmake_config="Debug" cmake_options.append( f"-DCMAKE_BUILD_TYPE={cmake_config}" ) #--build {build_path}" ) if args.deps_dir is None: deps_dir = os.path.abspath(os.path.join(build_dir, 'deps')).replace('\\','/') else: deps_dir = args.deps_dir if (OS_info["ID"] == 'windows'): cmake_base_options = f"-DROCM_PATH={rocm_path} -DCMAKE_PREFIX_PATH:PATH={rocm_path[:-1]};{rocm_cmake_path[1:]}" # -DCMAKE_INSTALL_PREFIX=rocmath-install" #-DCMAKE_INSTALL_LIBDIR= else: cmake_base_options = f"-DROCM_PATH={rocm_path} -DCMAKE_PREFIX_PATH:PATH={rocm_path[:-1]},{rocm_cmake_path[1:-1]}" # -DCMAKE_INSTALL_PREFIX=rocmath-install" #-DCMAKE_INSTALL_LIBDIR= cmake_options.append( cmake_base_options ) print( cmake_options ) # clean delete_dir( build_path ) create_dir( os.path.join(build_path, "clients") ) os.chdir( build_path ) # packaging options cmake_pack_options = f"-DCPACK_SET_DESTDIR=OFF -DCPACK_INCLUDE_TOPLEVEL_DIRECTORY=OFF" cmake_options.append( cmake_pack_options ) if args.static_lib: cmake_options.append( f"-DBUILD_SHARED_LIBS=OFF" ) if args.skip_ld_conf_entry: cmake_options.append( f"-DROCM_DISABLE_LDCONFIG=ON" ) if args.build_tests: cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_DIR={build_dir}" ) if args.build_clients: cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_BENCHMARK=ON -DBUILD_EXAMPLE=ON -DBUILD_DIR={build_dir}" ) if args.no_offload_compress: cmake_options.append( f"-DBUILD_OFFLOAD_COMPRESS=OFF" ) cmake_options.append( f"-DAMDGPU_TARGETS={args.gpu_architecture}" ) if args.cmake_dargs: for i in args.cmake_dargs: cmake_options.append( f"-D{i}" ) cmake_options.append( f"{src_path}") # case "${ID}" in # centos|rhel) # cmake_options="${cmake_options} -DCMAKE_FIND_ROOT_PATH=/usr/lib64/llvm7.0/lib/cmake/" # ;; # windows) # cmake_options="${cmake_options} -DWIN32=ON -DROCM_PATH=${rocm_path} -DROCM_DIR:PATH=${rocm_path} -DCMAKE_PREFIX_PATH:PATH=${rocm_path}" # cmake_options="${cmake_options} --debug-trycompile -DCMAKE_MAKE_PROGRAM=nmake.exe -DCMAKE_TOOLCHAIN_FILE=toolchain-windows.cmake" # # -G '"NMake Makefiles JOM"'" # ;; # esac cmd_opts = " ".join(cmake_options) return cmake_executable, cmd_opts def make_cmd(): global args global OS_info make_options = [] if (OS_info["ID"] == 'windows'): make_executable = "cmake.exe --build ." # ninja" if args.verbose: make_options.append( "--verbose" ) make_options.append( "--target all" ) if args.install: make_options.append( "--target package --target install" ) else: nproc = OS_info["NUM_PROC"] make_executable = f"make -j {nproc}" if args.verbose: make_options.append( "VERBOSE=1" ) if args.install: make_options.append( "install" ) cmd_opts = " ".join(make_options) return make_executable, cmd_opts def run_cmd(exe, opts): program = f"{exe} {opts}" if sys.platform.startswith('win'): sh = True else: sh = True print(program) proc = subprocess.run(program, check=True, stderr=subprocess.STDOUT, shell=sh) #proc = subprocess.Popen(cmd, cwd=os.getcwd()) #cwd=os.path.join(workingdir,"..",".."), stdout=fout, stderr=fout, # env=os.environ.copy()) #proc.wait() return proc.returncode def main(): global args os_detect() args = parse_args() # configure exe, opts = config_cmd() run_cmd(exe, opts) # make/build/install exe, opts = make_cmd() run_cmd(exe, opts) if __name__ == '__main__': main() ./cmake/0000775000175100017510000000000015176134512012170 5ustar jenkinsjenkins./cmake/ConfigAutotune.cmake0000664000175100017510000001163215176134454016134 0ustar jenkinsjenkins# MIT License # # Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # Function to add a configured source file to a target. # It parses arguments, prepares the output file name, and configures the file. function(add_configured_source) # Parse arguments and ensure proper usage cmake_parse_arguments(PARSE_ARGV 0 ARG "" "INPUT;TARGET;OUTPUT_PATTERN" "NAMES;VALUES") list(LENGTH ARG_NAMES NAMES_LEN) list(LENGTH ARG_VALUES VALS_LEN) if (NOT NAMES_LEN EQUAL VALS_LEN) message(FATAL_ERROR "add_configured_source: The same number of names (${NAMES_LEN}) and values (${VALS_LEN}) must be provided!") endif() # Loop through the names and values, preparing the output pattern set(max ${VALS_LEN}) math(EXPR max "${max} - 1") foreach(i RANGE ${max}) list(GET ARG_NAMES ${i} curr_name) list(GET ARG_VALUES ${i} "${curr_name}") endforeach() # Configure the output file and add it to the target string(CONFIGURE "${ARG_OUTPUT_PATTERN}" output @ONLY) string(MAKE_C_IDENTIFIER ${output} output) set(output_path "${ARG_TARGET}.parallel/${output}.cpp") configure_file("${ARG_INPUT}" "${output_path}" @ONLY) set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_CLEAN_FILES "${ARG_TARGET}.parallel") target_sources("${ARG_TARGET}" PRIVATE "${output_path}") target_include_directories("${ARG_TARGET}" PRIVATE "../benchmark") # Ensure reconfiguration if necessary set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${ARG_INPUT}" "${output_path}") endfunction() # Function to divide two numbers and round up. function(div_round_up dividend divisor result_var) math(EXPR result "(${dividend} + ${divisor} - 1) / ${divisor}") set("${result_var}" "${result}" PARENT_SCOPE) endfunction() # Function to add a matrix of configured sources. # It handles permutations of input parameters and calls add_configured_source accordingly. function(add_matrix) set(single_value_args "TARGET" "INPUT" "OUTPUT_PATTERN" "SHARDS" "CURRENT_SHARD") cmake_parse_arguments(PARSE_ARGV 0 ARG "" "${single_value_args}" "NAMES;LISTS") # Validate argument lengths list(LENGTH ARG_NAMES NAMES_LEN) list(LENGTH ARG_LISTS LISTS_LEN) if (NOT NAMES_LEN EQUAL LISTS_LEN) message(FATAL_ERROR "add_matrix: The same number of names (${NAMES_LEN}) and lists (${LISTS_LEN}) must be provided!") endif() # Calculate the total number of permutations set(total_len 1) foreach(LIST IN LISTS ARG_LISTS) string(REPLACE " " ";" list ${LIST}) list(LENGTH list LIST_LEN) math(EXPR total_len "${total_len} * ${LIST_LEN}") endforeach() # Handle sharding if(NOT DEFINED ARG_SHARDS) set(ARG_SHARDS 1) endif() div_round_up("${total_len}" "${ARG_SHARDS}" per_shard) # Determine the range of permutations for the current shard math(EXPR start "${ARG_CURRENT_SHARD} * ${per_shard}") math(EXPR stop "${start} + ${per_shard} - 1") # Process each permutation foreach(i RANGE ${start} ${stop}) set(index ${i}) set(values "") foreach(input_list IN LISTS ARG_LISTS) string(REPLACE " " ";" curr_list ${input_list}) list(LENGTH curr_list curr_length) math(EXPR curr_index "${index} % ${curr_length}") list(GET curr_list ${curr_index} curr_item) list(APPEND values "${curr_item}") math(EXPR index "${index} / ${curr_length}") endforeach() # Add the configured source for each permutation add_configured_source(TARGET "${ARG_TARGET}" INPUT "${ARG_INPUT}" OUTPUT_PATTERN "${ARG_OUTPUT_PATTERN}" NAMES ${ARG_NAMES} VALUES ${values}) endforeach() endfunction() # Function to filter out odd block sizes. # It sets a variable in the parent scope based on the condition. function(reject_odd_blocksize RESULT BlockSize) math(EXPR res "${BlockSize} % 2") if(res EQUAL 0) set("${RESULT}" ON PARENT_SCOPE) else() set("${RESULT}" OFF PARENT_SCOPE) endif() endfunction()./cmake/VerifyCompiler.cmake0000664000175100017510000000330615176134454016140 0ustar jenkinsjenkins# MIT License # # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip ${ROCM_PATH}/llvm /opt/rocm/llvm /opt/rocm /opt/rocm/hip) find_package(hip REQUIRED CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm) if(NOT USE_HIPCXX) if(HIP_COMPILER STREQUAL "clang") if(NOT (HIP_CXX_COMPILER MATCHES ".*hipcc" OR HIP_CXX_COMPILER MATCHES ".*clang\\+\\+")) message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as C++ compiler.") endif() else() message(FATAL_ERROR "HIP_COMPILER must be 'clang' (AMD ROCm platform)") endif() endif() ./cmake/Summary.cmake0000664000175100017510000001116215176134454014635 0ustar jenkinsjenkins# MIT License # # Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. function(print_configuration_summary) find_package(Git) if(GIT_FOUND) execute_process( COMMAND ${GIT_EXECUTABLE} show --format=%H --no-patch WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} OUTPUT_VARIABLE COMMIT_HASH OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${GIT_EXECUTABLE} show --format=%s --no-patch WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} OUTPUT_VARIABLE COMMIT_SUBJECT OUTPUT_STRIP_TRAILING_WHITESPACE ) endif() execute_process( COMMAND ${CMAKE_CXX_COMPILER} --version WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERBOSE_DETAILS OUTPUT_STRIP_TRAILING_WHITESPACE ) find_program(UNAME_EXECUTABLE uname) if(UNAME_EXECUTABLE) execute_process( COMMAND ${UNAME_EXECUTABLE} -a WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} OUTPUT_VARIABLE LINUX_KERNEL_DETAILS OUTPUT_STRIP_TRAILING_WHITESPACE ) endif() string(REPLACE "\n" ";" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") list(TRANSFORM CMAKE_CXX_COMPILER_VERBOSE_DETAILS PREPEND "-- ") string(REPLACE ";" "\n" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") message(STATUS "") message(STATUS "******** Summary ********") message(STATUS "General:") message(STATUS " System : ${CMAKE_SYSTEM_NAME}") if(USE_HIPCXX) message(STATUS " HIP compiler : ${CMAKE_HIP_COMPILER}") message(STATUS " HIP compiler version : ${CMAKE_HIP_COMPILER_VERSION}") string(STRIP "${CMAKE_HIP_FLAGS}" CMAKE_HIP_FLAGS_STRIP) message(STATUS " HIP flags : ${CMAKE_HIP_FLAGS_STRIP}") else() message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP) message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_STRIP}") endif() get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) if(GENERATOR_IS_MULTI_CONFIG) message(STATUS " Build types : ${CMAKE_CONFIGURATION_TYPES}") else() message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") endif() message(STATUS " Install prefix : ${CMAKE_INSTALL_PREFIX}") if(USE_HIPCXX) message(STATUS " Device targets : ${CMAKE_HIP_ARCHITECTURES}") else() message(STATUS " Device targets : ${GPU_TARGETS}") endif() message(STATUS "") message(STATUS " ONLY_INSTALL : ${ONLY_INSTALL}") message(STATUS " BUILD_TEST : ${BUILD_TEST}") message(STATUS " WITH_ROCRAND : ${WITH_ROCRAND}") message(STATUS " BUILD_BENCHMARK : ${BUILD_BENCHMARK}") message(STATUS " BUILD_NAIVE_BENCHMARK : ${BUILD_NAIVE_BENCHMARK}") message(STATUS " BUILD_EXAMPLE : ${BUILD_EXAMPLE}") message(STATUS " BUILD_DOCS : ${BUILD_DOCS}") message(STATUS " BUILD_OFFLOAD_COMPRESS : ${BUILD_OFFLOAD_COMPRESS}") message(STATUS " USE_SYSTEM_LIB : ${USE_SYSTEM_LIB}") message(STATUS "") message(STATUS "Detailed:") message(STATUS " C++ compiler details : \n${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") if(GIT_FOUND) message(STATUS " Commit : ${COMMIT_HASH}") message(STATUS " ${COMMIT_SUBJECT}") endif() if(UNAME_EXECUTABLE) message(STATUS " Unix name : ${LINUX_KERNEL_DETAILS}") endif() endfunction() ./cmake/GenerateResourceSpec.cmake0000775000175100017510000000671415176134454017267 0ustar jenkinsjenkins#!/usr/bin/cmake -P find_program(ROCMINFO_EXECUTABLE rocminfo ) if(NOT ROCMINFO_EXECUTABLE) message(FATAL_ERROR "rocminfo not found") endif() execute_process( COMMAND ${ROCMINFO_EXECUTABLE} RESULT_VARIABLE ROCMINFO_EXIT_CODE OUTPUT_VARIABLE ROCMINFO_STDOUT ERROR_VARIABLE ROCMINFO_STDERR ) if(ROCMINFO_EXIT_CODE) message(SEND_ERROR "rocminfo exited with ${ROCMINFO_EXIT_CODE}") message(SEND_ERROR ${ROCMINFO_STDOUT}) message(FATAL_ERROR ${ROCMINFO_STDERR}) endif() string(REGEX MATCHALL [[--(gfx[0-9a-f]+)]] ROCMINFO_MATCHES ${ROCMINFO_STDOUT} ) # NOTE: Unfortunately we don't have structs in CMake, # neither do we have std::partition only list(SORT) # # Transform raw regex matches to pairs of gfx IP and device id # This will be our struct emulation. In C++ it would be # # struct device # { # std::string ip; # int id; # }; # # std::vector GFXIP_AND_ID{ {"gfx900",0},{"gfx803",1},{"gfx900",2} }; # std::sort(GFXIP_AND_ID.begin(), GFXIP_AND_ID.end(), # [](const device& lhs, const device& rhs) # { # return std::lexicographical_compare(lhs.ip.begin(), lhs.ip.end(), # rhs.ip.begin(), rhs.ip.end()); # }); # set(GFXIP_AND_ID) set(ID 0) foreach(ROCMINFO_MATCH IN LISTS ROCMINFO_MATCHES) string(REGEX REPLACE "--" "" ROCMINFO_MATCH ${ROCMINFO_MATCH} ) list(APPEND GFXIP_AND_ID "${ROCMINFO_MATCH}:${ID}") math(EXPR ID "${ID} + 1") endforeach() list(SORT GFXIP_AND_ID) # Now comes the tricky part: implementing the following C++ logic # # std::stringstream JSON_PAYLOAD; # auto it = GFXIP_AND_ID.begin(); # while (it != GFXIP_AND_ID.end()) # { # auto IT = std::find_if(it, GFXIP_AND_ID.end(), # [=](const device& ip_id){ return ip_id.ip.compare(it->ip) != 0; }); # JSON_PAYLOAD << "\n \"" << it->ip << "\": ["; # std::for_each(it, IT, [&](const device& ip_id) # { # JSON_PAYLOAD << # "\n {\n" << # " \"id\": \"" << ip_id.id << "\"\n" << # " },"; # }); # JSON_PAYLOAD.seekp(-1, std::ios_base::end); // discard trailing comma # JSON_PAYLOAD << "\n ],"; # it = IT; # } # JSON_PAYLOAD.seekp(-1, std::ios_base::end); // discard trailing comma # set(JSON_PAYLOAD) set(IT1 0) list(GET GFXIP_AND_ID ${IT1} I1) string(REGEX REPLACE ":[0-9a-f]+" "" IP1 ${I1}) list(LENGTH GFXIP_AND_ID COUNT) while(IT1 LESS COUNT) string(APPEND JSON_PAYLOAD "\n \"${IP1}\": [") set(IT2 ${IT1}) list(GET GFXIP_AND_ID ${IT2} I2) string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) while(${IP2} STREQUAL ${IP1} AND IT2 LESS COUNT) string(APPEND JSON_PAYLOAD "\n {\n" " \"id\": \"${ID2}\"\n" " }," ) math(EXPR IT2 "${IT2} + 1") if(IT2 LESS COUNT) list(GET GFXIP_AND_ID ${IT2} I2) string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) endif() endwhile() string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) string(APPEND JSON_PAYLOAD "\n ],") set(IT1 ${IT2}) set(IP1 ${IP2}) endwhile() string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) set(JSON_HEAD [[{ "version": { "major": 1, "minor": 0 }, "local": [ {]] ) set(JSON_TAIL [[ } ] }]] ) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/resources.json ${JSON_HEAD} ${JSON_PAYLOAD} ${JSON_TAIL} ) ./cmake/Dependencies.cmake0000664000175100017510000002335515176134512015570 0ustar jenkinsjenkins# MIT License # # Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # ########################### # rocPRIM dependencies # ########################### # NOTE1: the reason we don't scope global state meddling using add_subdirectory # is because CMake < 3.24 lacks CMAKE_FIND_PACKAGE_TARGETS_GLOBAL which # would promote IMPORTED targets of find_package(CONFIG) to be visible # by other parts of the build. So we save and restore global state. # # NOTE2: We disable the ROCMChecks.cmake warning noting that we meddle with # global state. This is consequence of abusing the CMake CXX language # which HIP piggybacks on top of. This kind of HIP support has one chance # at observing the global flags, at the find_package(HIP) invocation. # The device compiler won't be able to pick up changes after that, hence # the warning. set(USER_CXX_FLAGS ${CMAKE_CXX_FLAGS}) if(DEFINED BUILD_SHARED_LIBS) set(USER_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) endif() set(USER_ROCM_WARN_TOOLCHAIN_VAR ${ROCM_WARN_TOOLCHAIN_VAR}) set(ROCM_WARN_TOOLCHAIN_VAR OFF CACHE BOOL "") # Turn off warnings and errors for all warnings in dependencies separate_arguments(CXX_FLAGS_LIST NATIVE_COMMAND ${CMAKE_CXX_FLAGS}) list(REMOVE_ITEM CXX_FLAGS_LIST /WX -Werror -Werror=pendantic -pedantic-errors) if(MSVC) list(FILTER CXX_FLAGS_LIST EXCLUDE REGEX "/[Ww]([0-4]?)(all)?") # Remove MSVC warning flags list(APPEND CXX_FLAGS_LIST /w) else() list(FILTER CXX_FLAGS_LIST EXCLUDE REGEX "-W(all|extra|everything)") # Remove GCC/LLVM flags list(APPEND CXX_FLAGS_LIST -w) endif() list(JOIN CXX_FLAGS_LIST " " CMAKE_CXX_FLAGS) # Don't build client dependencies as shared set(BUILD_SHARED_LIBS OFF CACHE BOOL "Global flag to cause add_library() to create shared libraries if on." FORCE) # HIP dependency is handled earlier in the project cmake file # when VerifyCompiler.cmake is included. include(FetchContent) # For downloading, building, and installing required dependencies include(cmake/DownloadProject.cmake) # Test dependencies if(BUILD_TEST) # NOTE1: Google Test has created a mess with legacy FindGTest.cmake and newer GTestConfig.cmake # # FindGTest.cmake defines: GTest::GTest, GTest::Main, GTEST_FOUND # # GTestConfig.cmake defines: GTest::gtest, GTest::gtest_main, GTest::gmock, GTest::gmock_main # # NOTE2: Finding GTest in MODULE mode, one cannot invoke find_package in CONFIG mode, because targets # will be duplicately defined. # # NOTE3: The following snippet first tries to find Google Test binary either in MODULE or CONFIG modes. # If neither succeeds it goes on to import Google Test into this build either from a system # source package (apt install googletest on Ubuntu 18.04 only) or GitHub and defines the MODULE # mode targets. Otherwise if MODULE or CONFIG succeeded, then it prints the result to the # console via a non-QUIET find_package call and if CONFIG succeeded, creates ALIAS targets # with the MODULE IMPORTED names. if(NOT DEPENDENCIES_FORCE_DOWNLOAD) if(WIN32) # Older versions of gtest on Windows does not support printing of 128-bit values, # Causing compilation errors. find_package(GTest 1.11.0 REQUIRED) else() find_package(GTest QUIET) endif() endif() if(NOT TARGET GTest::GTest AND NOT TARGET GTest::gtest) option(BUILD_GTEST "Builds the googletest subproject" ON) option(BUILD_GMOCK "Builds the googlemock subproject" OFF) option(INSTALL_GTEST "Enable installation of googletest." OFF) if(EXISTS /usr/src/googletest AND NOT DEPENDENCIES_FORCE_DOWNLOAD) FetchContent_Declare( googletest SOURCE_DIR /usr/src/googletest ) else() message(STATUS "Google Test not found. Fetching...") FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG e2239ee6043f73722e7aa812a459f54a28552929 # release-1.11.0 ) endif() FetchContent_MakeAvailable(googletest) add_library(GTest::GTest ALIAS gtest) add_library(GTest::Main ALIAS gtest_main) else() find_package(GTest REQUIRED) if(TARGET GTest::gtest_main AND NOT TARGET GTest::Main) add_library(GTest::GTest ALIAS GTest::gtest) add_library(GTest::Main ALIAS GTest::gtest_main) endif() endif() endif(BUILD_TEST) if(BUILD_BENCHMARK) set(BENCHMARK_VERSION 1.8.0) if(NOT DEPENDENCIES_FORCE_DOWNLOAD) find_package(benchmark ${BENCHMARK_VERSION} CONFIG QUIET) endif() if(NOT TARGET benchmark::benchmark) message(STATUS "Google Benchmark not found. Fetching...") option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." OFF) option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark." OFF) FetchContent_Declare( googlebench GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG v${BENCHMARK_VERSION} ) set(HAVE_STD_REGEX ON) set(RUN_HAVE_STD_REGEX 1) FetchContent_MakeAvailable(googlebench) if(NOT TARGET benchmark::benchmark) add_library(benchmark::benchmark ALIAS benchmark) endif() else() find_package(benchmark CONFIG REQUIRED) endif() endif(BUILD_BENCHMARK) if(NOT DEPENDENCIES_FORCE_DOWNLOAD) find_package(ROCmCMakeBuildTools 0.11.0 CONFIG QUIET PATHS "${ROCM_ROOT}") # rocm-cmake endif() if(NOT ROCmCMakeBuildTools_FOUND) message(STATUS "ROCm CMake not found. Fetching...") # We don't really want to consume the build and test targets of ROCm CMake. # CMake 3.18 allows omitting them, even though there's a CMakeLists.txt in source root. if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) set(SOURCE_SUBDIR_ARG SOURCE_SUBDIR "DISABLE ADDING TO BUILD") else() set(SOURCE_SUBDIR_ARG) endif() set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") FetchContent_Declare( rocm-cmake GIT_REPOSITORY https://github.com/ROCm/rocm-cmake.git GIT_TAG rocm-6.4.4 ${SOURCE_SUBDIR_ARG} ) FetchContent_GetProperties(rocm-cmake) if(NOT rocm-cmake_POPULATED) # rocm-cmake 0.12.0 and higher needs to built from source FetchContent_Populate(rocm-cmake) message("Populated: ${rocm-cmake_SOURCE_DIR}") execute_process( WORKING_DIRECTORY ${rocm-cmake_SOURCE_DIR} COMMAND ${CMAKE_COMMAND} ${rocm-cmake_SOURCE_DIR} -DCMAKE_INSTALL_PREFIX=. ) execute_process( WORKING_DIRECTORY ${rocm-cmake_SOURCE_DIR} COMMAND ${CMAKE_COMMAND} --build ${rocm-cmake_SOURCE_DIR} --target install ) endif() FetchContent_MakeAvailable(rocm-cmake) find_package(ROCmCMakeBuildTools CONFIG REQUIRED NO_DEFAULT_PATH PATHS "${rocm-cmake_SOURCE_DIR}") else() find_package(ROCmCMakeBuildTools 0.11.0 CONFIG REQUIRED PATHS "${ROCM_ROOT}") endif() # rocRAND (https://github.com/ROCmSoftwarePlatform/rocRAND) if(WITH_ROCRAND) find_package(rocrand QUIET) endif() if(WITH_ROCRAND AND NOT rocrand_FOUND) message(STATUS "Downloading and building rocrand.") set(ROCRAND_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/rocrand CACHE PATH "") set(EXTRA_CMAKE_ARGS "-DGPU_TARGETS=${GPU_TARGETS}") # CMAKE_ARGS of download_project (or ExternalProject_Add) can't contain ; so another separator # is needed and LIST_SEPARATOR is passed to download_project() string(REPLACE ";" "|" EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS}") # Pass launcher so sccache can be used to speed up building rocRAND if(CMAKE_CXX_COMPILER_LAUNCHER) set(EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}") endif() download_project( PROJ rocrand GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rocRAND.git GIT_TAG develop GIT_SHALLOW TRUE INSTALL_DIR ${ROCRAND_ROOT} LIST_SEPARATOR | CMAKE_ARGS -DCMAKE_CXX_COMPILER=hipcc -DBUILD_TEST=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_PREFIX_PATH=/opt/rocm ${EXTRA_CMAKE_ARGS} LOG_DOWNLOAD TRUE LOG_CONFIGURE TRUE LOG_BUILD TRUE LOG_INSTALL TRUE LOG_OUTPUT_ON_FAILURE TRUE BUILD_PROJECT TRUE UPDATE_DISCONNECTED TRUE ) find_package(rocrand REQUIRED CONFIG PATHS ${ROCRAND_ROOT}) endif() # Restore user global state set(CMAKE_CXX_FLAGS ${USER_CXX_FLAGS}) if(DEFINED USER_BUILD_SHARED_LIBS) set(BUILD_SHARED_LIBS ${USER_BUILD_SHARED_LIBS}) else() unset(BUILD_SHARED_LIBS CACHE ) endif() set(ROCM_WARN_TOOLCHAIN_VAR ${USER_ROCM_WARN_TOOLCHAIN_VAR} CACHE BOOL "") include(ROCMSetupVersion) include(ROCMCreatePackage) include(ROCMInstallTargets) include(ROCMPackageConfigHelpers) include(ROCMInstallSymlinks) include(ROCMCheckTargetIds) include(ROCMClients) if(BUILD_DOCS) include(ROCMSphinxDoc) endif() ./cmake/DownloadProject.CMakeLists.cmake.in0000664000175100017510000000200115176134454020671 0ustar jenkinsjenkins# Distributed under the OSI-approved MIT License. See accompanying # file LICENSE or https://github.com/Crascit/DownloadProject for details. cmake_minimum_required(VERSION 2.8.2) project(${DL_ARGS_PROJ}-download NONE) include(ExternalProject) if(${DL_ARGS_BUILD_PROJECT}) ExternalProject_Add(${DL_ARGS_PROJ}-download ${DL_ARGS_UNPARSED_ARGUMENTS} SOURCE_DIR "${DL_ARGS_SOURCE_DIR}" BUILD_IN_SOURCE TRUE TEST_COMMAND "" ) else() ExternalProject_Add(${DL_ARGS_PROJ}-download ${DL_ARGS_UNPARSED_ARGUMENTS} SOURCE_DIR "${DL_ARGS_SOURCE_DIR}" BUILD_IN_SOURCE TRUE TEST_COMMAND "" UPDATE_COMMAND "" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" ) endif() ./cmake/DownloadProject.cmake0000664000175100017510000001674515176134454016312 0ustar jenkinsjenkins# Distributed under the OSI-approved MIT License. See accompanying # file LICENSE or https://github.com/Crascit/DownloadProject for details. # # MODULE: DownloadProject # # PROVIDES: # download_project( PROJ projectName # [PREFIX prefixDir] # [DOWNLOAD_DIR downloadDir] # [SOURCE_DIR srcDir] # [BINARY_DIR binDir] # [QUIET] # ... # ) # # Provides the ability to download and unpack a tarball, zip file, git repository, # etc. at configure time (i.e. when the cmake command is run). How the downloaded # and unpacked contents are used is up to the caller, but the motivating case is # to download source code which can then be included directly in the build with # add_subdirectory() after the call to download_project(). Source and build # directories are set up with this in mind. # # The PROJ argument is required. The projectName value will be used to construct # the following variables upon exit (obviously replace projectName with its actual # value): # # projectName_SOURCE_DIR # projectName_BINARY_DIR # # The SOURCE_DIR and BINARY_DIR arguments are optional and would not typically # need to be provided. They can be specified if you want the downloaded source # and build directories to be located in a specific place. The contents of # projectName_SOURCE_DIR and projectName_BINARY_DIR will be populated with the # locations used whether you provide SOURCE_DIR/BINARY_DIR or not. # # The DOWNLOAD_DIR argument does not normally need to be set. It controls the # location of the temporary CMake build used to perform the download. # # The PREFIX argument can be provided to change the base location of the default # values of DOWNLOAD_DIR, SOURCE_DIR and BINARY_DIR. If all of those three arguments # are provided, then PREFIX will have no effect. The default value for PREFIX is # CMAKE_BINARY_DIR. # # The QUIET option can be given if you do not want to show the output associated # with downloading the specified project. # # In addition to the above, any other options are passed through unmodified to # ExternalProject_Add() to perform the actual download, patch and update steps. # # Only those ExternalProject_Add() arguments which relate to downloading, patching # and updating of the project sources are intended to be used. Also note that at # least one set of download-related arguments are required. # # If using CMake 3.2 or later, the UPDATE_DISCONNECTED option can be used to # prevent a check at the remote end for changes every time CMake is run # after the first successful download. See the documentation of the ExternalProject # module for more information. It is likely you will want to use this option if it # is available to you. Note, however, that the ExternalProject implementation contains # bugs which result in incorrect handling of the UPDATE_DISCONNECTED option when # using the URL download method or when specifying a SOURCE_DIR with no download # method. Fixes for these have been created, the last of which is scheduled for # inclusion in CMake 3.8.0. Details can be found here: # # https://gitlab.kitware.com/cmake/cmake/commit/bdca68388bd57f8302d3c1d83d691034b7ffa70c # https://gitlab.kitware.com/cmake/cmake/issues/16428 # # If you experience build errors related to the update step, consider avoiding # the use of UPDATE_DISCONNECTED. # # EXAMPLE USAGE: # # include(DownloadProject) # download_project(PROJ googletest # GIT_REPOSITORY https://github.com/google/googletest.git # GIT_TAG master # UPDATE_DISCONNECTED 1 # QUIET # ) # # add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) # #======================================================================================== set(_DownloadProjectDir "${CMAKE_CURRENT_LIST_DIR}") include(CMakeParseArguments) function(download_project) set(options QUIET) set(oneValueArgs PROJ PREFIX DOWNLOAD_DIR SOURCE_DIR BINARY_DIR BUILD_PROJECT ) set(multiValueArgs "") cmake_parse_arguments(DL_ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) # Hide output if requested if (DL_ARGS_QUIET) set(OUTPUT_QUIET "OUTPUT_QUIET") else() unset(OUTPUT_QUIET) message(STATUS "Downloading/updating ${DL_ARGS_PROJ}") endif() # Set up where we will put our temporary CMakeLists.txt file and also # the base point below which the default source and binary dirs will be. # The prefix must always be an absolute path. if (NOT DL_ARGS_PREFIX) set(DL_ARGS_PREFIX "${CMAKE_BINARY_DIR}") else() get_filename_component(DL_ARGS_PREFIX "${DL_ARGS_PREFIX}" ABSOLUTE BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}") endif() if (NOT DL_ARGS_DOWNLOAD_DIR) set(DL_ARGS_DOWNLOAD_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-download") endif() # Ensure the caller can know where to find the source and build directories if (NOT DL_ARGS_SOURCE_DIR) set(DL_ARGS_SOURCE_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-src") endif() if (NOT DL_ARGS_BINARY_DIR) set(DL_ARGS_BINARY_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-build") endif() set(${DL_ARGS_PROJ}_SOURCE_DIR "${DL_ARGS_SOURCE_DIR}" PARENT_SCOPE) set(${DL_ARGS_PROJ}_BINARY_DIR "${DL_ARGS_BINARY_DIR}" PARENT_SCOPE) # The way that CLion manages multiple configurations, it causes a copy of # the CMakeCache.txt to be copied across due to it not expecting there to # be a project within a project. This causes the hard-coded paths in the # cache to be copied and builds to fail. To mitigate this, we simply # remove the cache if it exists before we configure the new project. It # is safe to do so because it will be re-generated. Since this is only # executed at the configure step, it should not cause additional builds or # downloads. file(REMOVE "${DL_ARGS_DOWNLOAD_DIR}/CMakeCache.txt") # Create and build a separate CMake project to carry out the download. # If we've already previously done these steps, they will not cause # anything to be updated, so extra rebuilds of the project won't occur. # Make sure to pass through CMAKE_MAKE_PROGRAM in case the main project # has this set to something not findable on the PATH. configure_file("${_DownloadProjectDir}/DownloadProject.CMakeLists.cmake.in" "${DL_ARGS_DOWNLOAD_DIR}/CMakeLists.txt") execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" -D "CMAKE_MAKE_PROGRAM:FILE=${CMAKE_MAKE_PROGRAM}" . RESULT_VARIABLE result ${OUTPUT_QUIET} WORKING_DIRECTORY "${DL_ARGS_DOWNLOAD_DIR}" ) if(result) message(FATAL_ERROR "CMake step for ${DL_ARGS_PROJ} failed: ${result}") endif() execute_process(COMMAND ${CMAKE_COMMAND} --build . RESULT_VARIABLE result ${OUTPUT_QUIET} WORKING_DIRECTORY "${DL_ARGS_DOWNLOAD_DIR}" ) if(result) message(FATAL_ERROR "Build step for ${DL_ARGS_PROJ} failed: ${result}") endif() endfunction() ./NOTICES.txt0000664000175100017510000000661615176134454012773 0ustar jenkinsjenkinsNotices and Licenses file ______________________________________________________________________________ AMD copyrighted code (MIT) Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ROCmSoftwarePlatform-rocPRIM v2.5.0 (MIT) Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. florianrappl-cmdparser v-u (MIT) Copyright (c) 2015 - 2016 Florian Rappl Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ./toolchain-windows.cmake0000664000175100017510000000226615176134512015570 0ustar jenkinsjenkins#set(CMAKE_MAKE_PROGRAM "nmake.exe") #set(CMAKE_GENERATOR "Ninja") # Ninja doesn't support platform #set(CMAKE_GENERATOR_PLATFORM x64) if (DEFINED ENV{HIP_PATH}) file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR) set(rocm_bin "${HIP_DIR}/bin") elseif (DEFINED ENV{HIP_DIR}) file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR) set(rocm_bin "${HIP_DIR}/bin") else() set(HIP_DIR "C:/hip") set(rocm_bin "C:/hip/bin") endif() set(CMAKE_CXX_COMPILER "${rocm_bin}/clang++.exe") if (NOT python) set(python "python3") # take default for windows endif() # our usage flags set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWIN32 -D_CRT_SECURE_NO_WARNINGS") # flags for clang direct use # -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] which is used by msvc compiler set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fms-extensions -fms-compatibility -Wno-ignored-attributes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__") if (DEFINED ENV{VCPKG_PATH}) file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH) else() set(VCPKG_PATH "C:/github/vcpkg") endif() include("${VCPKG_PATH}/scripts/buildsystems/vcpkg.cmake") ./common/0000775000175100017510000000000015176134512012400 5ustar jenkinsjenkins./common/utils_device_ptr.hpp0000664000175100017510000005366015176134454016474 0ustar jenkinsjenkins// Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #ifndef ROCPRIM_UTILS_DEVICE_PTR_HPP #define ROCPRIM_UTILS_DEVICE_PTR_HPP #include "utils.hpp" #include #include #include #include namespace common { /// \brief An RAII friendly class to manage the memory allocated on device. /// /// \tparam A Template type used by the class. template class device_ptr { public: using decay_type = std::decay_t; using size_type = std::size_t; using value_type = ValueType; private: // If value_type is void we want to emulate allocating bytes (uchar). using value_type_proxy = std::conditional_t::value, unsigned char, ValueType>; public: static constexpr size_t value_size = sizeof(value_type_proxy); device_ptr() : device_raw_ptr_(nullptr), number_of_ele_(0){}; /// \brief Construct with a pre-allocated memory space. device_ptr(size_type pre_alloc_number_of_ele) : device_raw_ptr_(nullptr), number_of_ele_(pre_alloc_number_of_ele) { size_type storage_size = number_of_ele_ * value_size; HIP_CHECK(common::hipMallocHelper(&device_raw_ptr_, storage_size)); }; device_ptr(device_ptr const&) = delete; device_ptr(device_ptr&& other) noexcept : device_raw_ptr_(other.device_raw_ptr_), number_of_ele_(other.number_of_ele_) { other.leak(); }; /// \brief Construct by host vectors with the same sized value_type template explicit device_ptr(std::vector const& data) : device_raw_ptr_(nullptr), number_of_ele_(data.size()) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); size_type storage_size = number_of_ele_ * value_size; HIP_CHECK(common::hipMallocHelper(&device_raw_ptr_, storage_size)); HIP_CHECK(hipMemcpy(device_raw_ptr_, data.data(), storage_size, hipMemcpyHostToDevice)); } template explicit device_ptr(std::vector const& data, hipStream_t stream) : device_raw_ptr_(nullptr), number_of_ele_(data.size()) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); size_type storage_size = number_of_ele_ * value_size; HIP_CHECK(common::hipMallocHelper(&device_raw_ptr_, storage_size)); HIP_CHECK(hipMemcpyAsync(device_raw_ptr_, data.data(), storage_size, hipMemcpyHostToDevice, stream)); } template explicit device_ptr(std::array const& data) : device_raw_ptr_(nullptr), number_of_ele_(Size) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); size_type storage_size = Size * value_size; HIP_CHECK(common::hipMallocHelper(&device_raw_ptr_, storage_size)); HIP_CHECK(hipMemcpy(device_raw_ptr_, data.data(), storage_size, hipMemcpyHostToDevice)); } template explicit device_ptr(std::array const& data, hipStream_t stream) : device_raw_ptr_(nullptr), number_of_ele_(Size) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); size_type storage_size = Size * value_size; HIP_CHECK(common::hipMallocHelper(&device_raw_ptr_, storage_size)); HIP_CHECK(hipMemcpyAsync(device_raw_ptr_, data.data(), storage_size, hipMemcpyHostToDevice, stream)); } template explicit device_ptr(std::unique_ptr const& uptr, size_type size) : device_raw_ptr_(nullptr), number_of_ele_(size) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); size_type storage_size = size * value_size; HIP_CHECK(common::hipMallocHelper(&device_raw_ptr_, storage_size)); HIP_CHECK(hipMemcpy(device_raw_ptr_, uptr.get(), storage_size, hipMemcpyHostToDevice)); } template explicit device_ptr(std::unique_ptr const& uptr, size_type size, hipStream_t stream) : device_raw_ptr_(nullptr), number_of_ele_(size) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); size_type storage_size = size * value_size; HIP_CHECK(common::hipMallocHelper(&device_raw_ptr_, storage_size)); HIP_CHECK(hipMemcpyAsync(device_raw_ptr_, uptr.get(), storage_size, hipMemcpyHostToDevice, stream)); } ~device_ptr() { free_manually(); }; device_ptr& operator=(device_ptr const&) = delete; device_ptr& operator=(device_ptr&& other) noexcept { free_manually(); device_raw_ptr_ = other.device_raw_ptr_; number_of_ele_ = other.number_of_ele_; other.leak(); return *this; }; /// \brief Do copy on the device. /// /// \return A new `device_ptr` rvalue. device_ptr duplicate() const { device_ptr ret; ret.number_of_ele_ = number_of_ele_; size_type storage_size = number_of_ele_ * value_size; HIP_CHECK(common::hipMallocHelper(&ret.device_raw_ptr_, storage_size)); HIP_CHECK( hipMemcpy(ret.device_raw_ptr_, device_raw_ptr_, storage_size, hipMemcpyDeviceToDevice)); return ret; } device_ptr duplicate_async(hipStream_t stream) const { device_ptr ret; ret.number_of_ele_ = number_of_ele_; size_type storage_size = number_of_ele_ * value_size; HIP_CHECK(common::hipMallocHelper(&ret.device_raw_ptr_, storage_size)); HIP_CHECK(hipMemcpyAsync(ret.device_raw_ptr_, device_raw_ptr_, storage_size, hipMemcpyDeviceToDevice, stream)); return ret; } /// \brief Do type cast and move the ownership to the new `device_ptr`. /// /// \return A new `device_ptr` rvalue. template device_ptr move_cast() noexcept { using target_value_t = typename device_ptr::value_type; auto ret_deivce_raw_ptr_ = static_cast(static_cast(device_raw_ptr_)); auto ret_number_of_ele_ = value_size * number_of_ele_ / sizeof(target_value_t); leak(); return {ret_deivce_raw_ptr_, ret_number_of_ele_}; } /// \brief Get the device raw pointer value_type* get() const noexcept { return device_raw_ptr_; } /// \brief Clean every thing on this instance, which could lead to memory leak. Should call `get()` and free the raw pointer manually void leak() noexcept { device_raw_ptr_ = nullptr; number_of_ele_ = 0; } /// \brief Call this function to garbage the memory in advance void free_manually() { if(device_raw_ptr_ != nullptr) { HIP_CHECK(hipFree(device_raw_ptr_)); } leak(); } void resize(size_type new_number_of_ele) { if(new_number_of_ele == 0) { free_manually(); } else { value_type* device_temp_ptr = nullptr; HIP_CHECK(common::hipMallocHelper(&device_temp_ptr, new_number_of_ele * value_size)); HIP_CHECK(hipMemcpy(device_temp_ptr, device_raw_ptr_, std::min(new_number_of_ele, number_of_ele_) * value_size, hipMemcpyDeviceToDevice)); free_manually(); device_raw_ptr_ = device_temp_ptr; number_of_ele_ = new_number_of_ele; } } void resize_async(size_type new_number_of_ele, hipStream_t stream) { if(new_number_of_ele == 0) { free_manually(); } else { value_type* device_temp_ptr = nullptr; HIP_CHECK(common::hipMallocHelper(&device_temp_ptr, new_number_of_ele * value_size)); HIP_CHECK(hipMemcpyAsync(device_temp_ptr, device_raw_ptr_, std::min(new_number_of_ele, number_of_ele_) * value_size, hipMemcpyDeviceToDevice, stream)); free_manually(); device_raw_ptr_ = device_temp_ptr; number_of_ele_ = new_number_of_ele; } } // if got error hipErrorOutOfMemory` return false, else return `true` bool resize_with_memory_check(size_type new_number_of_ele) { if(new_number_of_ele == 0) { free_manually(); } else { value_type* device_temp_ptr = nullptr; const auto err = common::hipMallocHelper(&device_temp_ptr, new_number_of_ele * value_size); if(err == hipErrorOutOfMemory) { (void) hipGetLastError(); // reset internally recorded HIP error return false; } HIP_CHECK(err); HIP_CHECK(hipMemcpy(device_temp_ptr, device_raw_ptr_, std::min(new_number_of_ele, number_of_ele_) * value_size, hipMemcpyDeviceToDevice)); free_manually(); device_raw_ptr_ = device_temp_ptr; number_of_ele_ = new_number_of_ele; } return true; } bool resize_with_memory_check_async(size_type new_number_of_ele, hipStream_t stream) { if(new_number_of_ele == 0) { free_manually(); } else { value_type* device_temp_ptr = nullptr; const auto err = common::hipMallocHelper(&device_temp_ptr, new_number_of_ele * value_size); if(err == hipErrorOutOfMemory) { return false; } HIP_CHECK(err); HIP_CHECK(hipMemcpyAsync(device_temp_ptr, device_raw_ptr_, std::min(new_number_of_ele, number_of_ele_) * value_size, hipMemcpyDeviceToDevice, stream)); free_manually(); device_raw_ptr_ = device_temp_ptr; number_of_ele_ = new_number_of_ele; } return true; } /// \brief Get the size of this memory space size_type msize() const noexcept { return number_of_ele_ * value_size; } /// \brief Get the number of elements size_type size() const noexcept { return number_of_ele_; } /// \brief Copy from host to device template void store(std::vector const& host_vec, size_type offset = 0) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); if(host_vec.size() + offset > number_of_ele_) { resize(host_vec.size() + offset); } HIP_CHECK(hipMemcpy(device_raw_ptr_ + offset, host_vec.data(), host_vec.size() * value_size, hipMemcpyHostToDevice)); } template void store(std::array const& host_arr) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); if(Size > number_of_ele_) { resize(Size); } HIP_CHECK( hipMemcpy(device_raw_ptr_, host_arr.data(), Size * value_size, hipMemcpyHostToDevice)); } template void store(std::unique_ptr const& uptr, size_type offset, size_type number_of_ele) { static_assert( sizeof(InValueType) == value_size, "value_type of input unique_ptr must have the same size with device_ptr::value_type"); if(offset + number_of_ele > number_of_ele_) { resize(offset + number_of_ele); } HIP_CHECK(hipMemcpy(device_raw_ptr_ + offset, uptr.get(), number_of_ele * value_size, hipMemcpyHostToDevice)); } template void store_async(std::vector const& host_vec, hipStream_t stream) { static_assert( sizeof(InValueType) == value_size, "value_type of input vector must have the same size with device_ptr::value_type"); if(host_vec.size() > number_of_ele_) { resize(host_vec.size()); } HIP_CHECK(hipMemcpyAsync(device_raw_ptr_, host_vec.data(), host_vec.size() * value_size, hipMemcpyHostToDevice, stream)); } template void store_async(std::array const& host_arr, hipStream_t stream) { static_assert(sizeof(InValueType) == value_size, "value_type of input must have the same size with device_ptr::value_type"); if(Size > number_of_ele_) { resize(Size); } HIP_CHECK(hipMemcpyAsync(device_raw_ptr_, host_arr.data(), Size * value_size, hipMemcpyHostToDevice, stream)); } template void store_async(std::unique_ptr const& uptr, size_type offset, size_type number_of_ele, hipStream_t stream) { static_assert( sizeof(InValueType) == value_size, "value_type of input unique_ptr must have the same size with device_ptr::value_type"); if(offset + number_of_ele > number_of_ele_) { resize(offset + number_of_ele); } HIP_CHECK(hipMemcpyAsync(device_raw_ptr_ + offset, uptr.get(), number_of_ele * value_size, hipMemcpyHostToDevice, stream)); } // will not check the boundary void store_value_at(size_type pos, value_type_proxy const& value) { HIP_CHECK(hipMemcpy(device_raw_ptr_ + pos, &value, value_size, hipMemcpyHostToDevice)); } // will not check the boundary template void store_value_at_async(size_type pos, value_type_proxy const& value, hipStream_t stream) { HIP_CHECK( hipMemcpy(device_raw_ptr_ + pos, &value, value_size, hipMemcpyHostToDevice, stream)); } /// \brief Copy from device to device template void replace(device_ptr const& device_ptr) { static_assert(sizeof(InPtrValueType) == value_size, "sizeof(InPtrValueType) must equal to value_size"); if(device_ptr.number_of_ele_ > number_of_ele_) { resize(device_ptr.number_of_ele_); } HIP_CHECK(hipMemcpy(device_raw_ptr_, device_ptr.device_raw_ptr_, device_ptr.number_of_ele_ * value_size, hipMemcpyDeviceToDevice)); } template void replace_async(device_ptr const& device_ptr, hipStream_t stream) { static_assert(sizeof(InPtrValueType) == value_size, "sizeof(InPtrValueType) must equal to value_size"); if(device_ptr.number_of_ele_ > number_of_ele_) { resize(device_ptr.number_of_ele_); } HIP_CHECK(hipMemcpyAsync(device_raw_ptr_, device_ptr.device_raw_ptr_, device_ptr.number_of_ele_ * value_size, hipMemcpyDeviceToDevice, stream)); } void memset(size_type offset, int value, size_type size_bytes) { HIP_CHECK(hipMemset(reinterpret_cast(device_raw_ptr_) + offset, value, static_cast(size_bytes))); } void memset_async(size_type offset, int value, size_type size_bytes, hipStream_t stream) { HIP_CHECK(hipMemsetAsync(reinterpret_cast(device_raw_ptr_) + offset, value, static_cast(size_bytes), stream)); } /// \brief Copy from device to host /// This function will store loaded values into std::vector auto load() const { std::vector ret(number_of_ele_); HIP_CHECK(hipMemcpy(ret.data(), device_raw_ptr_, number_of_ele_ * value_size, hipMemcpyDeviceToHost)); return ret; } auto load_async(hipStream_t stream) const { std::vector ret(number_of_ele_); HIP_CHECK(hipMemcpyAsync(ret.data(), device_raw_ptr_, number_of_ele_ * value_size, hipMemcpyDeviceToHost, stream)); return ret; } template auto load_to_array() const { std::array ret; HIP_CHECK(hipMemcpy(ret.data(), device_raw_ptr_, std::min(number_of_ele_, Size) * value_size, hipMemcpyDeviceToHost)); return ret; } template auto load_to_array_async(hipStream_t stream) const { std::array ret; HIP_CHECK(hipMemcpyAsync(ret.data(), device_raw_ptr_, std::min(number_of_ele_, Size) * value_size, hipMemcpyDeviceToHost, stream)); return ret; } auto load_to_unique_ptr() const { std::unique_ptr ret(new value_type[number_of_ele_]); HIP_CHECK(hipMemcpy(ret.get(), device_raw_ptr_, number_of_ele_ * value_size, hipMemcpyDeviceToHost)); return ret; } auto load_to_unique_ptr_async(hipStream_t stream) const { std::unique_ptr ret(new value_type[number_of_ele_]); HIP_CHECK(hipMemcpyAsync(ret.get(), device_raw_ptr_, number_of_ele_ * value_size, hipMemcpyDeviceToHost, stream)); return ret; } auto load_value_at(size_type pos) const { value_type ret; HIP_CHECK(hipMemcpy(&ret, device_raw_ptr_ + pos, value_size, hipMemcpyDeviceToHost)); return ret; } auto load_value_at_async(size_type pos, hipStream_t stream) const { value_type ret; HIP_CHECK( hipMemcpyAsync(&ret, device_raw_ptr_ + pos, value_size, hipMemcpyDeviceToHost, stream)); return ret; } private: value_type* device_raw_ptr_; size_type number_of_ele_; }; } // namespace common #endif ./common/predicate_iterator.hpp0000664000175100017510000000275615176134454017001 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_PREDICATE_ITERATOR_HPP_ #define COMMON_PREDICATE_ITERATOR_HPP_ namespace common { template struct increment_by { template __host__ __device__ T constexpr operator()(const T& value) const { return value + T{V}; } }; } // namespace common #endif // COMMON_PREDICATE_ITERATOR_HPP_ ./common/utils.hpp0000664000175100017510000001045215176134512014253 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_UTILS_HPP_ #define COMMON_UTILS_HPP_ #include #ifdef USE_GTEST // GoogleTest-compatible HIP_CHECK macro. FAIL is called to log the Google Test trace. // The lambda is invoked immediately as assertions that generate a fatal failure can // only be used in void-returning functions. #define HIP_CHECK(condition) \ { \ hipError_t error = condition; \ if(error != hipSuccess) \ { \ [error]() \ { FAIL() << "HIP error " << error << ": " << hipGetErrorString(error); }(); \ exit(error); \ } \ } #else #define HIP_CHECK(condition) \ { \ hipError_t error = condition; \ if(error != hipSuccess) \ { \ std::cout << "HIP error: " << hipGetErrorString(error) << " file: " << __FILE__ \ << " line: " << __LINE__ << std::endl; \ exit(error); \ } \ } #endif namespace common { template __device__ constexpr bool device_test_enabled_for_warp_size_v = ::rocprim::arch::wavefront::max_size() >= LogicalWarpSize; inline char* __get_env(const char* name) { char* env; #ifdef _MSC_VER errno_t err = _dupenv_s(&env, nullptr, name); if(err) { return nullptr; } #else env = std::getenv(name); #endif return env; } inline void clean_env(char* env) { #ifdef _MSC_VER free(env); #endif (void)env; } inline bool use_hmm() { char* env = __get_env("ROCPRIM_USE_HMM"); const bool hmm = (env != nullptr) && (strcmp(env, "1") == 0); clean_env(env); return hmm; } // Helper for HMM allocations: HMM is requested through ROCPRIM_USE_HMM=1 environment variable template hipError_t hipMallocHelper(T** devPtr, size_t size) { if(use_hmm()) { return hipMallocManaged(reinterpret_cast(devPtr), size); } else { return hipMalloc(reinterpret_cast(devPtr), size); } return hipSuccess; } } // namespace common #endif // COMMON_UTILS_HPP_ ./common/warp_exchange.hpp0000664000175100017510000001135715176134454015740 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_WARP_EXCHANGE_HPP_ #define COMMON_WARP_EXCHANGE_HPP_ #include namespace common { struct BlockedToStripedOp { template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&input_data)[ItemsPerThread], T (&output_data)[ItemsPerThread], typename warp_exchange_type::storage_type& storage) const { warp_exchange.blocked_to_striped(input_data, output_data, storage); } template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], typename warp_exchange_type::storage_type& storage) const { warp_exchange.blocked_to_striped(thread_data, thread_data, storage); } }; struct BlockedToStripedShuffleOp { template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&input_data)[ItemsPerThread], T (&output_data)[ItemsPerThread], typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.blocked_to_striped_shuffle(input_data, output_data); } template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.blocked_to_striped_shuffle(thread_data, thread_data); } }; struct StripedToBlockedOp { template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&input_data)[ItemsPerThread], T (&output_data)[ItemsPerThread], typename warp_exchange_type::storage_type& storage) const { warp_exchange.striped_to_blocked(input_data, output_data, storage); } template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], typename warp_exchange_type::storage_type& storage) const { warp_exchange.striped_to_blocked(thread_data, thread_data, storage); } }; struct StripedToBlockedShuffleOp { template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&input_data)[ItemsPerThread], T (&output_data)[ItemsPerThread], typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.striped_to_blocked_shuffle(input_data, output_data); } template ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.striped_to_blocked_shuffle(thread_data, thread_data); } }; } // namespace common #endif // COMMON_WARP_EXCHANGE_HPP_ ./common/utils_custom_type.hpp0000664000175100017510000002052115176134454016711 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_UTILS_CUSTOM_TYPE_HPP_ #define COMMON_UTILS_CUSTOM_TYPE_HPP_ #include #include #include #include namespace common { template struct custom_type { using first_type = T; using second_type = U; // value_type is valid if T == U using value_type = std::conditional_t::value, T, void>; T x; U y; // Non-zero values in default constructor for checking reduce and scan: // ensure that scan_op(custom_type(), value) != value ROCPRIM_HOST_DEVICE constexpr inline custom_type() : x(NonZero ? 12 : 0), y(NonZero ? 34 : 0) {} ROCPRIM_HOST_DEVICE inline custom_type(T x, U y) : x(x), y(y) {} ROCPRIM_HOST_DEVICE inline custom_type(T xy) : x(xy), y(xy) {} template ROCPRIM_HOST_DEVICE inline custom_type(const custom_type& other) : x(static_cast(other.x)), y(static_cast(other.y)) {} ROCPRIM_HOST_DEVICE inline ~custom_type() = default; ROCPRIM_HOST_DEVICE inline custom_type operator+(const custom_type& other) const { rocprim::plus plus_T; rocprim::plus plus_U; return custom_type{plus_T(x, other.x), plus_U(y, other.y)}; } ROCPRIM_HOST_DEVICE inline custom_type operator-(const custom_type& other) const { rocprim::minus minus_T; rocprim::minus minus_U; return custom_type(minus_T(x, other.x), minus_U(y, other.y)); } ROCPRIM_HOST_DEVICE inline custom_type& operator=(const custom_type& other) { x = other.x; y = other.y; return *this; } ROCPRIM_HOST_DEVICE inline custom_type& operator+=(const custom_type& other) { x += other.x; y += other.y; return *this; } ROCPRIM_HOST_DEVICE inline bool operator<(const custom_type& other) const { rocprim::less less_T; rocprim::equal_to equal_to_T; rocprim::less less_U; return (less_T(x, other.x) || (equal_to_T(x, other.x) && less_U(y, other.y))); } ROCPRIM_HOST_DEVICE inline bool operator>(const custom_type& other) const { rocprim::greater greater_T; rocprim::equal_to equal_to_T; rocprim::greater greater_U; return (greater_T(x, other.x) || (equal_to_T(x, other.x) && greater_U(y, other.y))); } ROCPRIM_HOST_DEVICE inline bool operator==(const custom_type& other) const { rocprim::equal_to equal_to_T; rocprim::equal_to equal_to_U; return (equal_to_T(x, other.x) && equal_to_U(y, other.y)); } ROCPRIM_HOST_DEVICE inline bool operator!=(const custom_type& other) const { return !(*this == other); } friend inline std::ostream& operator<<(std::ostream& stream, const custom_type& value) { stream << "[" << value.x << "; " << value.y << "]"; return stream; } }; template struct custom_type_copyable { using first_type = T; using second_type = U; using value_type = std::conditional_t::value, T, void>; T x; U y; ROCPRIM_HOST_DEVICE constexpr inline custom_type_copyable() : x(NonZero ? 12 : 0), y(NonZero ? 34 : 0) {} ROCPRIM_HOST_DEVICE inline custom_type_copyable(T x, U y) : x(x), y(y) {} ROCPRIM_HOST_DEVICE inline custom_type_copyable(T xy) : x(xy), y(xy) {} template ROCPRIM_HOST_DEVICE inline custom_type_copyable( const custom_type_copyable& other) : x(static_cast(other.x)), y(static_cast(other.y)) {} ROCPRIM_HOST_DEVICE inline bool operator<(const custom_type_copyable& other) const { rocprim::less less_T; rocprim::equal_to equal_to_T; rocprim::less less_U; return (less_T(x, other.x) || (equal_to_T(x, other.x) && less_U(y, other.y))); } ROCPRIM_HOST_DEVICE inline bool operator>(const custom_type_copyable& other) const { rocprim::greater greater_T; rocprim::equal_to equal_to_T; rocprim::greater greater_U; return (greater_T(x, other.x) || (equal_to_T(x, other.x) && greater_U(y, other.y))); } ROCPRIM_HOST_DEVICE inline bool operator==(const custom_type_copyable& other) const { rocprim::equal_to equal_to_T; rocprim::equal_to equal_to_U; return (equal_to_T(x, other.x) && equal_to_U(y, other.y)); } ROCPRIM_HOST_DEVICE inline bool operator!=(const custom_type_copyable& other) const { return !(*this == other); } friend inline std::ostream& operator<<(std::ostream& stream, const custom_type_copyable& value) { stream << "[" << value.x << "; " << value.y << "]"; return stream; } }; static_assert(std::is_trivially_copyable>::value, "custom_type_copyable is not trivially copyable"); template struct is_custom_type_copyable : std::false_type {}; template struct is_custom_type_copyable> : std::true_type {}; template struct custom_huge_type : custom_type { static constexpr auto extra_bytes = Size - sizeof(T) - sizeof(U); std::uint8_t data[extra_bytes]; // Non-zero values in default constructor for checking reduce and scan: // ensure that scan_op(custom_type(), value) != value ROCPRIM_HOST_DEVICE constexpr inline custom_huge_type() : custom_type() {} ROCPRIM_HOST_DEVICE inline custom_huge_type(T x, U y) : custom_type(x, y) {} ROCPRIM_HOST_DEVICE inline custom_huge_type(T xy) : custom_type(xy) {} template ROCPRIM_HOST_DEVICE inline custom_huge_type(const custom_type& other) : custom_type(other) {} template ROCPRIM_HOST_DEVICE inline custom_huge_type( const custom_huge_type& other) : custom_type(static_cast(other.x), static_cast(other.y)) {} friend inline std::ostream& operator<<(std::ostream& stream, const custom_huge_type& value) { stream << "[" << value.x << "; " << value.y << "]"; return stream; } }; template struct is_custom_type : std::false_type {}; template struct is_custom_type> : std::true_type {}; template struct is_custom_type> : std::true_type {}; template struct is_custom_type> : std::true_type {}; } // namespace common #endif // COMMON_UTILS_CUSTOM_TYPE_HPP_ ./common/device_adjacent_difference.hpp0000664000175100017510000001407615176134454020370 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_DEVICE_ADJACENT_DIFFERENCE_HPP_ #define COMMON_DEVICE_ADJACENT_DIFFERENCE_HPP_ #include #include #include #include #include namespace common { enum class api_variant { no_alias, alias, in_place }; template auto dispatch_adjacent_difference( std::true_type /*left*/, std::integral_constant /*aliasing*/, void* const temporary_storage, std::size_t& storage_size, const InputIt input, const OutputIt output, Args&&... args) { return ::rocprim::adjacent_difference(temporary_storage, storage_size, input, output, std::forward(args)...); } template auto dispatch_adjacent_difference( std::false_type /*left*/, std::integral_constant /*aliasing*/, void* const temporary_storage, std::size_t& storage_size, const InputIt input, const OutputIt output, Args&&... args) { return ::rocprim::adjacent_difference_right(temporary_storage, storage_size, input, output, std::forward(args)...); } template auto dispatch_adjacent_difference( std::true_type /*left*/, std::integral_constant /*aliasing*/, void* const temporary_storage, std::size_t& storage_size, const InputIt input, const OutputIt /*output*/, Args&&... args) { return ::rocprim::adjacent_difference_inplace(temporary_storage, storage_size, input, std::forward(args)...); } template auto dispatch_adjacent_difference( std::false_type /*left*/, std::integral_constant /*aliasing*/, void* const temporary_storage, std::size_t& storage_size, const InputIt input, const OutputIt /*output*/, Args&&... args) { return ::rocprim::adjacent_difference_right_inplace(temporary_storage, storage_size, input, std::forward(args)...); } template auto dispatch_adjacent_difference( std::true_type /*left*/, std::integral_constant /*aliasing*/, void* const temporary_storage, std::size_t& storage_size, const InputIt input, const OutputIt output, Args&&... args) { return ::rocprim::adjacent_difference_inplace(temporary_storage, storage_size, input, output, std::forward(args)...); } template auto dispatch_adjacent_difference( std::false_type /*left*/, std::integral_constant /*aliasing*/, void* const temporary_storage, std::size_t& storage_size, const InputIt input, const OutputIt output, Args&&... args) { return ::rocprim::adjacent_difference_right_inplace(temporary_storage, storage_size, input, output, std::forward(args)...); } } // namespace common #endif // COMMON_DEVICE_ADJACENT_DIFFERENCE_HPP_ ./common/utils_half.hpp0000664000175100017510000000367015176134454015256 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_UTILS_HALF_HPP_ #define COMMON_UTILS_HALF_HPP_ #include #include #include #include namespace common { // Support half operators on host side ROCPRIM_HOST inline rocprim::native_half half_to_native(const rocprim::half& x) { return *reinterpret_cast(&x); } ROCPRIM_HOST inline rocprim::half native_to_half(const rocprim::native_half& x) { return *reinterpret_cast(&x); } } // namespace common // For better Google Test reporting and debug output of half values inline std::ostream& operator<<(std::ostream& stream, const rocprim::half& value) { stream << static_cast(value); return stream; } #endif // COMMON_UTILS_HALF_HPP_ ./common/utils_data_generation.hpp0000664000175100017510000001404415176134454017465 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_UTILS_DATA_GENERATION_HPP_ #define COMMON_UTILS_DATA_GENERATION_HPP_ #include #include #include #include #include #include namespace common { // uniform_int_distribution is undefined for anything other than: // short, int, long, long long, rocprim::int128_t, unsigned short, unsigned int, unsigned long, unsigned long long, or rocprim::uint128_t template struct is_valid_for_int_distribution : std::integral_constant< bool, std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value> {}; // uniform_int_distribution is defined for supporting rocprim::int128_t and rocprim::uint128_t template class uniform_int_distribution { public: typedef IntType result_type; uniform_int_distribution() : uniform_int_distribution(0) {} explicit uniform_int_distribution(IntType _a, IntType _b = rocprim::numeric_limits::max()) : lower_bound{_a}, upper_bound{_b} {} void reset() {} result_type a() const { return lower_bound; } result_type b() const { return upper_bound; } result_type min() const { return a(); } result_type max() const { return b(); } template result_type operator()(Generator& urng) { rocprim::uint128_t range = upper_bound - lower_bound + 1; auto offset = helper(urng, range); return offset + lower_bound; } friend bool operator==(const uniform_int_distribution& d1, const uniform_int_distribution& d2) { return d1.lower_bound == d2.lower_bound && d1.upper_bound == d2.upper_bound; } friend bool operator!=(const uniform_int_distribution& d1, const uniform_int_distribution& d2) { return !(d1 == d2); } // third constructor, param(), operator<< and operator>> are not defined private: // Java approach in the reference below. // Returns an unbiased random number from urng downscaled to [0, range) template static rocprim::uint128_t helper(Generator& urng, const rocprim::uint128_t& range) { // reference: Fast Random Integer Geeneration in an Interval // ACM Transactions on Modeling and Computer Simulation 29 (1), 2019 // https://arxiv.org/abs/1805.10941 static std::uniform_int_distribution dists[2]; auto random_number = rocprim::uint128_t{dists[0](urng)} << 64 | dists[1](urng); if(!range) { return random_number; } auto result = random_number % range; auto threshold = rocprim::numeric_limits::max() - range + 1; while(random_number - result > threshold) { random_number = rocprim::uint128_t{dists[0](urng)} << 64 | dists[1](urng); result = random_number % range; } return result; } IntType lower_bound; IntType upper_bound; }; template class uniform_int_distribution< IntType, std::enable_if_t<(!(std::is_same::value || std::is_same::value))>> : public std::uniform_int_distribution { public: using std::uniform_int_distribution::uniform_int_distribution; }; template struct generate_limits { static inline T min() { return rocprim::numeric_limits::min(); } static inline T max() { return rocprim::numeric_limits::max(); } }; template struct generate_limits< T, std::enable_if_t().is_build_in() && rocprim::is_integral::value>> { static inline T min() { return rocprim::numeric_limits::min(); } static inline T max() { return rocprim::numeric_limits::max(); } }; template struct generate_limits().is_build_in() && rocprim::is_floating_point::value>> { static inline T min() { return T(-1000); } static inline T max() { return T(1000); } }; template using it_value_t = typename std::iterator_traits::value_type; } // namespace common #endif // COMMON_UTILS_DATA_GENERATION_HPP_ ./common/device_batch_memcpy.hpp0000664000175100017510000001335415176134454017076 0ustar jenkinsjenkins// MIT License // // Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #ifndef COMMON_DEVICE_BATCH_MEMCPY_HPP_ #define COMMON_DEVICE_BATCH_MEMCPY_HPP_ #include #include #include #include #include #include #include #include #include namespace common { // Used for generating offsets. We generate a permutation map and then derive // offsets via a sum scan over the sizes in the order of the permutation. This // allows us to keep the order of buffers we pass to batch_memcpy, but still // have source and destinations mappings not be the identity function: // // batch_memcpy( // [&a0 , &b0 , &c0 , &d0 ], // from (note the order is still just a, b, c, d!) // [&a0', &b0', &c0', &d0'], // to (order is the same as above too!) // [3 , 2 , 1 , 2 ]) // size // // ┌───┬───┬───┬───┬───┬───┬───┬───┐ // │b0 │b1 │a0 │a1 │a2 │d0 │d1 │c0 │ buffer x contains buffers a, b, c, d // └───┴───┴───┴───┴───┴───┴───┴───┘ note that the order of buffers is shuffled! // ───┬─── ─────┬───── ───┬─── ─── // └─────────┼─────────┼───┐ // ┌───┘ ┌───┘ │ what batch_memcpy does // ▼ ▼ ▼ // ─── ─────────── ─────── ─────── // ┌───┬───┬───┬───┬───┬───┬───┬───┐ // │c0'│a0'│a1'│a2'│d0'│d1'│b0'│b1'│ buffer y contains buffers a', b', c', d' // └───┴───┴───┴───┴───┴───┴───┴───┘ template std::vector shuffled_exclusive_scan(const std::vector& input, RandomGenerator& rng) { const auto n = input.size(); assert(n > 0); std::vector result(n); std::vector permute(n); std::iota(permute.begin(), permute.end(), 0); std::shuffle(permute.begin(), permute.end(), rng); T sum = 0; for(size_t i = 0; i < n; ++i) { result[permute[i]] = sum; sum += input[permute[i]]; } return result; } template::type = 0> void init_input(ContainerMemCpy& h_input_for_memcpy, ContainerCopy& /*h_input_for_copy*/, std::mt19937_64& rng, byte_offset_type total_num_bytes) { std::independent_bits_engine bits_engine{rng}; const size_t num_ints = rocprim::detail::ceiling_div(total_num_bytes, sizeof(uint64_t)); h_input_for_memcpy = std::vector(num_ints * sizeof(uint64_t)); // generate_n for uninitialized memory, pragmatically use placement-new, since there are no // uint64_t objects alive yet in the storage. std::for_each( reinterpret_cast(h_input_for_memcpy.data()), reinterpret_cast(h_input_for_memcpy.data() + num_ints * sizeof(uint64_t)), [&bits_engine](uint64_t& elem) { ::new(&elem) uint64_t{bits_engine()}; }); } template::type = 0> void init_input(ContainerMemCpy& /*h_input_for_memcpy*/, ContainerCopy& h_input_for_copy, std::mt19937_64& rng, byte_offset_type total_num_bytes) { using value_type = typename ContainerCopy::value_type; std::independent_bits_engine bits_engine{rng}; const size_t num_ints = rocprim::detail::ceiling_div(total_num_bytes, sizeof(uint64_t)); const size_t num_of_elements = rocprim::detail::ceiling_div(num_ints * sizeof(uint64_t), sizeof(value_type)); h_input_for_copy = std::vector(num_of_elements); // generate_n for uninitialized memory, pragmatically use placement-new, since there are no // uint64_t objects alive yet in the storage. std::for_each(reinterpret_cast(h_input_for_copy.data()), reinterpret_cast(h_input_for_copy.data()) + num_ints, [&bits_engine](uint64_t& elem) { ::new(&elem) uint64_t{bits_engine()}; }); } } // namespace common #endif // COMMON_DEVICE_BATCH_MEMCPY_HPP_ ./common/README.md0000664000175100017510000000165515176134454013673 0ustar jenkinsjenkins# Common utilities rocPRIM's tests and benchmarks employ numerous utilities that are common in implementation. This folder hosts these for an easier and less error-prone maintenance. ## When to add a common utility When adding a new test or benchmark that depends on a utility, the following cases must be considered: 1. If the utility is already implemented in some `common` header, then there's nothing to do except perhaps extending its functionality. 2. If the utility does not exit yet in any `common` header, then fisrt it must be checked whether some `benchmark` or `test`[^1] utility header implements this functionality. If so, then it must be moved to the appropriate common header. 3. If the utility does not exit yet in any `common` nor `test` nor `benchmark` utility header, then it must be added to the appropriate `test` or `benchmark` header. [^1]: When adding a new test check the `benchmark` utilities, and viceversa. ./CMakeLists.txt0000664000175100017510000002620615176134512013656 0ustar jenkinsjenkins# MIT License # # Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. cmake_minimum_required(VERSION 3.16 FATAL_ERROR) cmake_policy(VERSION 3.16...3.25) # Install prefix set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") # rocPRIM project project(rocprim LANGUAGES CXX) # Set CXX flags if (NOT DEFINED CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 17) endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) # Set HIP flags set(CMAKE_HIP_STANDARD 14) set(CMAKE_HIP_STANDARD_REQUIRED ON) set(CMAKE_HIP_EXTENSIONS OFF) if(NOT CMAKE_CXX_STANDARD EQUAL 17) message(FATAL_ERROR "Only C++17 is supported") endif() if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) set(ROCPRIM_PROJECT_IS_TOP_LEVEL TRUE) else() set(ROCPRIM_PROJECT_IS_TOP_LEVEL FALSE) endif() #Adding CMAKE_PREFIX_PATH if(WIN32) set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation") else() set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") endif() include(CheckLanguage) include(CMakeDependentOption) # Build options # Disables building tests, benchmarks, examples option(ONLY_INSTALL "Only install" OFF) cmake_dependent_option(BUILD_TEST "Build tests (requires googletest)" OFF "NOT ONLY_INSTALL" OFF) option(WITH_ROCRAND "Build tests with device-side data generation(requires rocRAND)" OFF) cmake_dependent_option(BUILD_BENCHMARK "Build benchmarks" OFF "NOT ONLY_INSTALL" OFF) cmake_dependent_option(BUILD_EXAMPLE "Build examples" OFF "NOT ONLY_INSTALL" OFF) option(BUILD_NAIVE_BENCHMARK "Build naive benchmarks" OFF) cmake_dependent_option(BUILD_DOCS "Build documentation (requires sphinx)" OFF "NOT ONLY_INSTALL" OFF) option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF) option(ROCPRIM_INSTALL "Enable installation of rocPRIM (projects embedding rocPRIM may want to turn this OFF)" ON) option(ROCPRIM_ENABLE_ASSERTS "Enable asserts in release build)" OFF) option(BUILD_OFFLOAD_COMPRESS "Build rocPRIM with offload compression" ON) cmake_dependent_option(USE_SYSTEM_LIB "Use installed ROCm libs when building tests" OFF BUILD_TEST OFF) check_language(HIP) cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) include(CheckCXXCompilerFlag) if(BUILD_OFFLOAD_COMPRESS) check_cxx_compiler_flag("--offload-compress -x hip" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) if(CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --offload-compress") else() message(STATUS "Warning: BUILD_OFFLOAD_COMPRESS=ON but flag not supported by compiler. Ignoring option.") endif() endif() if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) set(ROCPRIM_PROJECT_IS_TOP_LEVEL TRUE) else() set(ROCPRIM_PROJECT_IS_TOP_LEVEL FALSE) endif() #Adding CMAKE_PREFIX_PATH if(WIN32) set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation") else() set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") endif() # CMake modules list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${ROCM_PATH}/lib/cmake/hip ${HIP_PATH}/cmake ${ROCM_ROOT}/lib/cmake/hip ${ROCM_ROOT}/hip/cmake # FindHIP.cmake ) # Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() if(ROCPRIM_ENABLE_ASSERTS) if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") string(TOUPPER ${CMAKE_BUILD_TYPE} BUILD_TYPE) set(BUILD_TYPE_CXX_FLAGS "CMAKE_CXX_FLAGS_${BUILD_TYPE}") set(BUILD_TYPE_C_FLAGS "CMAKE_C_FLAGS_${BUILD_TYPE}") endif() string(REGEX REPLACE "-DNDEBUG( |$)" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REGEX REPLACE "-DNDEBUG( |$)" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") string(REGEX REPLACE "-DNDEBUG( |$)" "" ${BUILD_TYPE_CXX_FLAGS} "${${BUILD_TYPE_CXX_FLAGS}}") string(REGEX REPLACE "-DNDEBUG( |$)" "" ${BUILD_TYPE_C_FLAGS} "${${BUILD_TYPE_C_FLAGS}}") endif() endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") if(DEFINED BUILD_SHARED_LIBS) set(PKG_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) else() set(PKG_BUILD_SHARED_LIBS ON) endif() set(BUILD_SHARED_LIBS OFF) # don't build client dependencies as shared # Get dependencies (required here to get rocm-cmake) include(cmake/Dependencies.cmake) # Use target ID syntax if supported for GPU_TARGETS if(USE_HIPCXX) enable_language(HIP) else() if (NOT DEFINED AMDGPU_TARGETS) set(GPU_TARGETS "all" CACHE STRING "GPU architectures to compile for") else() set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for") endif() set_property(CACHE GPU_TARGETS PROPERTY STRINGS "all") if(GPU_TARGETS STREQUAL "all") if(BUILD_ADDRESS_SANITIZER) # ASAN builds require xnack rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+;gfx950:xnack+" ) else() rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS TARGETS "gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201" ) endif() set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE) endif() endif() # Compressed offload binaries are currently not working with the SPIR-V target if("amdgcnspirv" IN_LIST GPU_TARGETS) if(BUILD_OFFLOAD_COMPRESS) message(FATAL_ERROR "Cannot combine SPIR-V and BUILD_OFFLOAD_COMPRESS") endif() endif() # TODO: Fix VerifyCompiler for HIP on Windows if (NOT WIN32) include(cmake/VerifyCompiler.cmake) endif() list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip ${ROCM_PATH}/llvm ${ROCM_ROOT}/llvm ${ROCM_ROOT} ${ROCM_ROOT}/hip) find_package(hip REQUIRED CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm) # Setup VERSION set(VERSION_STRING "4.2.0") rocm_setup_version(VERSION ${VERSION_STRING}) math(EXPR rocprim_VERSION_NUMBER "${rocprim_VERSION_MAJOR} * 100000 + ${rocprim_VERSION_MINOR} * 100 + ${rocprim_VERSION_PATCH}") # Print configuration summary include(cmake/Summary.cmake) print_configuration_summary() # rocPRIM library add_subdirectory(rocprim) if(ROCPRIM_PROJECT_IS_TOP_LEVEL AND (BUILD_TEST OR BUILD_BENCHMARK)) rocm_package_setup_component(clients) endif() # Tests if(BUILD_TEST) if(USE_SYSTEM_LIB) find_package(rocprim REQUIRED CONFIG PATHS "/opt/rocm/rocprim") if (NOT ${rocprim_VERSION} VERSION_EQUAL ${VERSION_STRING}) message(WARNING "The installed rocprim version, ${rocprim_VERSION}, does not match project version ${VERSION_STRING}. Building tests with USE_SYSTEM_LIB=ON may not work properly.") endif() endif() if (ROCPRIM_PROJECT_IS_TOP_LEVEL) rocm_package_setup_client_component(tests) endif() enable_testing() add_subdirectory(test) endif() # Benchmarks if(BUILD_BENCHMARK) if (ROCPRIM_PROJECT_IS_TOP_LEVEL) rocm_package_setup_client_component(benchmarks) endif() add_subdirectory(benchmark) endif() # Examples if(BUILD_EXAMPLE) add_subdirectory(example) endif() # Docs if(BUILD_DOCS) add_subdirectory(docs) endif() # set BUILD_SHARED_LIBS for packaging set(BUILD_SHARED_LIBS ${PKG_BUILD_SHARED_LIBS}) # Package if (ROCPRIM_PROJECT_IS_TOP_LEVEL) # add dependency on HIP runtime set(HIP_RUNTIME_MINIMUM 4.5.0) if(BUILD_ADDRESS_SANITIZER) set(DEPENDS_HIP_RUNTIME "hip-runtime-amd-asan" ) else() set(DEPENDS_HIP_RUNTIME "hip-runtime-amd" ) endif() rocm_package_add_dependencies(SHARED_DEPENDS "${DEPENDS_HIP_RUNTIME} >= ${HIP_RUNTIME_MINIMUM}") rocm_package_add_deb_dependencies(STATIC_DEPENDS "hip-static-dev >= ${HIP_RUNTIME_MINIMUM}") rocm_package_add_rpm_dependencies(STATIC_DEPENDS "hip-static-devel >= ${HIP_RUNTIME_MINIMUM}") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") set(CPACK_RPM_PACKAGE_LICENSE "MIT") set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) rocm_create_package( NAME rocprim DESCRIPTION "rocPRIM is a header-only library that provides HIP parallel primitives." MAINTAINER "rocPRIM Maintainer " HEADER_ONLY ) endif() if(BUILD_CODE_COVERAGE) target_compile_options(rocprim INTERFACE -w -fprofile-instr-generate -fcoverage-mapping ) target_link_options(rocprim INTERFACE -fprofile-instr-generate ) endif() # # ADDITIONAL TARGETS FOR CODE COVERAGE # if(BUILD_CODE_COVERAGE) add_custom_target( coverage_analysis COMMAND echo Coverage GTEST_FILTER=\${GTEST_FILTER} COMMAND ${CMAKE_COMMAND} -E rm -rf ./coverage-report COMMAND ${CMAKE_COMMAND} -E make_directory ./coverage-report/profraw COMMAND ctest --output-on-failure -E "rocprim.linking*" --gtest_filter=\"\${GTEST_FILTER}\" WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) find_program( LLVM_PROFDATA llvm-profdata REQUIRED HINTS ${ROCM_PATH}/llvm/bin PATHS /opt/rocm/llvm/bin ) find_program( LLVM_COV llvm-cov REQUIRED HINTS ${ROCM_PATH}/llvm/bin PATHS /opt/rocm/llvm/bin ) get_property(LLVM_COV_OBJECT_ARGS GLOBAL PROPERTY LLVM_COV_OBJECT_ARGS) add_custom_target( coverage DEPENDS coverage_analysis COMMAND ${LLVM_PROFDATA} merge -sparse ./coverage-report/profraw/rocprim-coverage_*.profraw -o ./coverage-report/rocprim.profdata COMMAND ${LLVM_COV} report ${LLVM_COV_OBJECT_ARGS} --ignore-filename-regex="test_*" -instr-profile=./coverage-report/rocprim.profdata COMMAND ${LLVM_COV} show ${LLVM_COV_OBJECT_ARGS} --ignore-filename-regex="test_*" -instr-profile=./coverage-report/rocprim.profdata -format=html -output-dir=coverage-report COMMAND ${LLVM_COV} export ${LLVM_COV_OBJECT_ARGS} --ignore-filename-regex="test_*" -instr-profile=./coverage-report/rocprim.profdata -format=lcov > ./coverage-report/coverage.info WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) endif() ./.clang-format0000664000175100017510000001143515176134454013474 0ustar jenkinsjenkins# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 UseCRLF: false # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: true AlignArrayOfStructures: Right AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: false AllowAllArgumentsOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: true AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: Yes BinPackArguments: false BinPackParameters: false BitFieldColonSpacing: Both # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: AfterCaseLabel: true AfterClass: true AfterControlStatement: Always AfterEnum: true AfterFunction: true AfterNamespace: true AfterStruct: true AfterUnion: true AfterExternBlock: false BeforeCatch: true BeforeElse: true BeforeLambdaBody: true BeforeWhile: true IndentBraces: false SplitEmptyFunction: false SplitEmptyRecord: false SplitEmptyNamespace: false BreakBeforeBinaryOperators: All BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeComma BreakInheritanceList: BeforeComma BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DeriveLineEnding: false DerivePointerAlignment: false EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: Always ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IfMacros: [] IncludeBlocks: Preserve IndentAccessModifiers: false IndentCaseBlocks: true IndentCaseLabels: true IndentExternBlock: NoIndent IndentPPDirectives: BeforeHash IndentWrappedFunctionNames: true KeepEmptyLinesAtTheStartOfBlocks: true LambdaBodyIndentation: Signature MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None PPIndentWidth: -1 PackConstructorInitializers: NextLine PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left QualifierAlignment: Leave ReferenceAlignment: Pointer ReflowComments: false ShortNamespaceLines: 0 SortIncludes: CaseSensitive SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: false SpaceAroundPointerQualifiers: Default SpaceBeforeAssignmentOperators: true SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: Never SpaceBeforeRangeBasedForLoopColon: true SpaceBeforeSquareBrackets: false SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: Never SpacesInCStyleCastParentheses: false SpacesInConditionalStatement: false SpacesInContainerLiterals: true SpacesInParentheses: false SpacesInSquareBrackets: false AttributeMacros: - __host__ - __device__ - __global__ - __forceinline__ - __shared__ - __launch_bounds__ - ROCPRIM_DEVICE - ROCPRIM_HOST - ROCPRIM_HOST_DEVICE - ROCPRIM_SHARED_MEMORY - ROCPRIM_KERNEL - ROCPRIM_INLINE - ROCPRIM_FORCE_INLINE - ROCPRIM_LAUNCH_BOUNDS # Trick clang into thinking that our C-style attributes are C++-style attributes # Make sure that the sizes line up for linebreaks etc Macros: - __host__=[[host]] - __device__=[[device]] - __global__=[[global]] - __forceinline__=[[forceinline]] - __shared__=[[shared]] - __launch_bounds__(x)=[[launch_bounds(x)]] - __attribute__(x)=[[attribute(x)]] - ROCPRIM_DEVICE=[[DEVICE____]] - ROCPRIM_HOST=[[HOST____]] - ROCPRIM_HOST_DEVICE=[[HOST_DEVICE____]] - ROCPRIM_SHARED_MEMORY=[[SHARED_MEMORY____]] - ROCPRIM_KERNEL=[[KERNEL____]] - ROCPRIM_INLINE=[[INLINE____]] - ROCPRIM_FORCE_INLINE=[FORCE_INLINE____]] - ROCPRIM_LAUNCH_BOUNDS(x)=[[launch_bounds(x)____]] BreakAfterAttributes: Always --- ./docs/0000775000175100017510000000000015176134512012040 5ustar jenkinsjenkins./docs/reference/0000775000175100017510000000000015176134512013776 5ustar jenkinsjenkins./docs/reference/developer.rst0000664000175100017510000004746615176134512016536 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _developer: ******************************************************************** rocPRIM Developer guidelines ******************************************************************** Overview ======== As explained in :ref:`rocprim-intro`, rocPRIM's operations are part of one of four different hierarchical scopes: *Device/Grid*, :term:`Block`, :term:`Warp`, or *Thread*. This division facilitates re-use in the codebase and provides flexibility for users. Additional developer considerations are: * *Device/Grid*: algorithms called from host code, executed on the entire device. The input size is variable and passed as an argument to the function. * :term:`Block`: algorithms that are called from device code, executed by one thread block. All threads in a thread block should participate in the function call, and the threads together perform the algorithm. They are defined as structures, to group similar overloads and provide associated types such as the ``storage_type`` defining shared memory storage requirements. The maximum input size is defined by template arguments. Optionally, an actual size can be defined by ``valid_items`` overloads. * :term:`Warp`: algorithms called from device code, executed by one warp. In many ways these algorithms are similar to the block-level algorithms, the key difference is that all threads in a warp collectively perform the algorithm. Through template arguments, a logical warp size can be specified. * *Thread*: algorithms called from the device and perform work in a single thread without communicating with other threads. See the contributing guide `CONTRIBUTING.md `_ for information on file structure and how test and benchmarks should be implemented. General rules ============= Code should be modular, and when possible broader scoped to facilitate reuse. If there is no adverse effect on performance, extract common functionality. The different hierarchies of the API are not only for the user, algorithm implementations use these endpoints as well. For instance, device-level algorithms typically use the block-level algorithms for loading and storing data. It should be clear from function template parameters whether they are tuning options that do not affect behavior, or are algorithmic parameters that change behavior. For instance, tuning options may be block size, items per thread, or the block-level scan method (``block_scan_algorithm``). An algorithmic parameter could be whether a scan has an initial value, or whether a reduction is inclusive or exclusive. An example of an enumeration that violates this rule is ``block_load_method``, where the different members make different orders of the elements. Between minor ROCm versions, breaking changes in the public API MUST NOT be introduced. Everything in the namespace ``rocprim`` is considered public API, based on the assumption that a user may in theory depend on it. Pay special attention not to break backward-compatibility, as it can be done in subtle ways. For example, many functions allow user-defined types, which behave differently in many ways from fundamental types. Be defensive in what is placed in the public API as sustaining backward-compatibility is a burden on maintenance. If it is not necessary to be exposed, place it in ``rocprim::detail`` (or lower) instead. A common additional check is to make sure downstream libraries still compile and execute tests successfully (hipCUB, rocThrust, Tensorflow, and PyTorch). HIP Graphs are a way to capture all stream-ordered HIP API calls into a graph without executing, and then replaying the graph many times afterwards. Supporting graph capture makes rocPRIM more flexible to use, and all device-level algorithms should strive to allow it. Among other things, one general requirement is that the number of kernel calls and the launch parameters of kernel calls should not depend on input data. If support is not possible for a specific algorithm, the documentation should state this clearly. Configurations and architecture dispatch ======================================== One of the most complex parts of rocPRIM is the mechanism that allows for the user-provided configuration and defaulted automatic configuration of device-level algorithms. Default and user-specified configuration ---------------------------------------- As explained in :ref:`tuning`, device-level algorithms may be configured by accepting a tuning config. It may be provided by the caller, or defaulted to ``default_config``, which selects a suitable default configuration. The number of threads in a block (the "block size") is a quintessential configuration parameter for kernels. It needs to be known at the host side to launch the kernel and at the device side at compile-time for the generation of algorithmic functions. HIP code is compiled in multiple passes, one pass for the host and one pass for each targeted device architecture. When a kernel is launched on the host, the HIP runtime selects the binary based on the device associated with the HIP stream. Since the configuration, and thus the block size, depends on this device architecture, rocPRIM must have a similar mechanism to infer the architecture of the device based on the the HIP stream. To facilitate a dispatching mechanism supporting the above requirements, several standardized structures need to be defined for each algorithm, which is outlined in this section. These structures depend on a generalized dispatching mechanism. The algorithm's configuration struct is defined in ``rocprim/device/detail/device_config_helper.hpp``. The reason for putting all configurations in one file is to make the configuration templates simpler (generating configurations is explained :ref:`tuning`). The tuning config has the name ``ALGO_config``, and no members (unless for backward-compatibility reasons), only template parameters. The config struct derives from a non-public parameter struct holding the actual parameters. This separation between structs is done to facilitate change without breaking public API. .. code:: cpp namespace detail { struct ALGO_config_params { unsigned int BlockSize; unsigned int ItemsPerThread; }; } // namespace detail template struct ALGO_config : public detail::ALGO_config_params { constexpr ALGO_config() : detail::ALGO_config_params{BlockSize, ItemsPerThread} {} } In order to accept either ``default_config`` or ``ALGO_config`` as the device-level configuration template type and convert it to a parameter instance, a non-public config wrapper is defined in ``rocprim/device/device_ALGO_config.hpp``. .. code:: cpp namespace detail { // generic struct that instantiates custom configurations template struct wrapped_ALGO_config { template struct architecture_config { static constexpr ALGO_config_params params = ALGOConfig(); }; }; // specialized for rocprim::default_config, which instantiates the default_ALGO_config template struct wrapped_ALGO_config { template struct architecture_config { static constexpr ALGO_config_params params = default_ALGO_config(Arch), Type>(); }; }; } // namespace detail Selecting the default configuration is done based on the target architecture ``target_arch`` and typically also on the input types of the algorithm (in the example above, a single type ``Type`` is used). The ``default_ALGO_config`` is defined in ``rocprim/include/device/detail/config/device_ALGO.hpp``. This file will be generated by the autotuning process, as explained in :ref:`tuning`. The files look like this: .. code:: cpp namespace detail { // base configuration in case no specific configuration exists template struct default_ALGO_config : default_ALGO_config_base::type {}; // generated configuration for architecture gfx1030, based on float template struct default_ALGO_config< static_cast(target_arch::gfx1030), Type, std::enable_if_t().is_floating_point()) && (sizeof(value_type) <= 4) && (sizeof(value_type) > 2)>> : ALGO_config<256, 16> {}; // many generated configurations.. } // namespace detail It is up to the implementer to specify a suitable and generic base configuration. This base configuration is not placed in the template to make the template simpler. Instead, it is defined in ``rocprim/device/detail/device_config_helper.hpp``: .. code:: cpp namespace detail { template struct default_ALGO_config_base { using type = ALGO_config<256, 4>; }; } // namespace detail Finally, the kernel is templatized with the ``wrapped_ALGO_config`` and not the actual configuration parameters. It is done so that the architecture enumeration value (or any dependent configuration parameters) does not appear in the function signature. This prevents a host-side switch statement over the architecture values to select the right kernel to launch. Instead, this selection is done at compile time in device code. Config dispatch --------------- The default configuration depends on the types of the input values of the algorithm, as well as the device architecture. The device architecture is determined at runtime, based on the HIP stream. At the host side, the configuration parameters are selected at runtime using the following pattern: .. code:: cpp using config = wrapped_ALGO_config; detail::target_arch target_arch; hipError_t result = host_target_arch(stream, target_arch); if(result != hipSuccess) { return result; } const ALGO_config_params params = dispatch_target_arch(target_arch); In device code, when targeting generic AMDGPU architectures, the device architecture is known at compile time, during the host pass. With SPIR-V targets, however, the architecture is only known at runtime, during the device pass. To bridge this, we compile one kernel for each supported architecture during the host pass. That way, the final binary always contains a kernel compiled with the right configuration, and the device pass can select it at runtime. The difference is that for generic targets we compile only the real (matching) kernel; the other per-arch kernels are empty. The method to access the configuration in device code is as follows: .. code:: cpp constexpr ALGO_CONFIG_PARAMS params = Config::template architecture_config::params; In the example, ``config`` is of type ``wrapped_ALGO_config`` as in the host example. Common patterns =============== There are several patterns throughout rocPRIM's codebase for uniformity and enforcing good practice. Temporary storage allocation ---------------------------- If a device-level function requires temporary storage, ``void* temporary_storage`` and ``size_t& storage_size`` will be the first two parameters. When calling the function with ``nullptr`` for ``temporary_storage``, the function will set ``storage_size`` to the required number of temporary device memory bytes. If no temporary storage is required under specific circumstances, ``storage_size`` should be set to a small non-zero value, to prevent the users from having to check before making a zero-sized allocation. Common functionality in the ``detail::temp_storage`` namespace is used to calculate the required storage on the first function call and assign pointers in the second function call. The below example allocates and assigns a temporary array of ten integers. .. code:: cpp hipError_t function(void* temporary_storage, size_t& storage_size) { int* d_tmp{}; // if temporary_storage is nullptr, sets storage_size to the required size // else, assigns the pointer d_tmp const hipError_t partition_result = detail::temp_storage::partition( temporary_storage, storage_size, detail::temp_storage::make_linear_partition( detail::temp_storage::ptr_aligned_array(&d_tmp, 10))); if(partition_result != hipSuccess || temporary_storage == nullptr) { return partition_result; } // perform the function with temporary memory return function_impl(d_tmp); } Reusing shared memory --------------------- Shared memory reuse in a kernel is facilitated by placing multiple ``storage_type`` declarations in a union. .. code:: cpp using block_load_t = block_load; using block_scan_t = block_scan; using block_store_t = block_store; ROCPRIM_SHARED_MEMORY union { typename block_load_t::storage_type load; typename block_scan_t::storage_type scan; typename block_store_t::storage_type store; } storage; T value; block_load_t().load(input, value, storage.load); syncthreads(); block_scan_t().scan(value, storage.scan); syncthreads(); block_store_t().store(output, value, storage.store); Partial block idiom ------------------- Since thread blocks have uniform sizes, bounds checking is necessary to prevent out-of-bounds loads and stores. Applying a check to every loaded and stored value may become a performance bottleneck. A typical solution is to have a block-wide check, whether a per-item check is necessary. A simple example is below. .. code:: cpp // slow, adds a check for every stored item in each block const unsigned int thread_id = detail::block_thread_id<0>(); const unsigned int block_id = detail::block_id<0>(); const auto num_valid_in_last_block = input_size - block_offset; block_store_t().store( output, values, num_valid_in_last_block, storage); // fast, adds a check only for incomplete blocks (which can only be the last block) constexpr unsigned int items_per_block = BlockSize * ItemsPerThread; const bool is_incomplete_block = block_id == (input_size / items_per_block); if(is_incomplete_block) { block_store_t().store( output, values, num_valid_in_last_block, storage); } else { block_store_t().store( output, values, storage); } Large indices ------------- Typically, each thread handles a fixed amount of elements and HIP limits how many threads can be in a single launch. This means there is a hard limit to the number of elements that can be handled in a single kernel call. Special attention must be paid to how input sizes beyond this limit are handled. This is commonly handled by launching multiple kernels in a loop and combining results. Naming of device-level functions -------------------------------- Typically, multiple overloads of device-level functions exist, that call into a common implementation. Below is an example of this pattern and what the naming should look like .. code:: cpp BEGIN_ROCPRIM_NAMESPACE namespace detail { ROCPRIM_KERNEL reduce_kernel(...) { // reduce_kernel_impl defined in rocprim/device/detail/device_reduce.hpp reduce_kernel_impl(...); } template hipError_t reduce_impl(...) { reduce_kernel<<<...>>>(...); } } // namespace detail // default reduce hipError_t reduce(...) { return detail::reduce_impl(...); } // reduce overload with initial value hipError_t reduce(...) { return detail::reduce_impl(...); } END_ROCPRIM_NAMESPACE Synchronous debugging --------------------- All device-level functions have as a last parameter ``bool debug_synchronous``, which defaults to ``false``. This parameter toggles synchronization after kernel launches for debugging purposes. Typically, additional debugging information is printed as well. Items per thread ---------------- Most device functions operate on a fixed number of elements and are templatized based on the element type. These functions will have an ``unsigned int ItemsPerThread`` template parameter, which specifies how many elements each thread should process. The main purpose of this parameter is to tune the performance of such a function. As different types are of different sizes, it is likely that there is no single ``ItemsPerThread`` value that gives good performance for types of all sizes. The ``ItemsPerThread`` value often directly influences register usage of a kernel, which influences the kernel's occupancy. Kernel launch bounds -------------------- To guide the code generation process, it is possible to specify the maximum block size for a kernel with ``__launch_bounds__()``. Since most kernels are templatized based on a configuration, a common pattern is the following: .. code:: cpp template ROCPRIM_KERNEL __launch_bounds__(device_params().block_size) void kernel(...) {} Pitfalls and common mistakes ---------------------------- HIP code is compiled in multiple passes: one for the host and one for each targeted device architecture. As such, host code is agnostic of device architecture, and should be designed as such. Only with a ``hipStream`` can the device be inferred and can certain properties be obtained. Since device code is compiled for a specific architecture, it can contain compile-time optimizations for specific architectures. Note that AMD GPUs have a warp size of 32 or 64, and unless specialized, algorithms should work for both warp sizes. All variables with the ``__shared__`` memory space specifier should either be in a function with the ``__global__`` (``ROCPRIM_KERNEL``) execution space specifier or in a function with the ``__device__`` (``ROCPRIM_DEVICE``) execution space specifier marked with ``__forceinline__`` (``ROCPRIM_FORCE_INLINE``). The reason for this is that without forcing the inlining of the function, the compiler may choose not to optimize shared memory allocations, leading to exceeding the limit dictated by hardware. Documenting algorithms ====================== Documenting algorithms requires updating several files. See `Contributing to the ROCm documentation `_ for general guidelines. The ``docs/`` directory contains the ``.rst`` files that form this website. These ``.rst`` files use `Breathe directives `_ to display `Doxygen's special commands `_, which are used in C++ comments to document code. Algorithms should be documented in their appropriate ``docs/`` subdirectories, like ``block_ops/`` and ``device_ops/``, based on their scope. :ref:`dev-nth_element` for example is a :ref:`device-wide operation `, documented in ``docs/device_ops/nth_element.rst``. There are several pages that link to the :ref:`dev-nth_element` documentation: - ``docs/device_ops/index.rst``, which adds :ref:`dev-nth_element` to :ref:`dev-index`. - ``docs/reference/ops_summary.rst``, which adds :ref:`dev-nth_element` to :ref:`ops-summary`. - ``docs/sphinx/_toc.yml.in``, which adds :ref:`dev-nth_element` to Sphinx its `Table Of Contents `_, and ``docs/sphinx/_toc.yml`` is automatically generated from this file. Any newly added algorithm should also update these files. As one can easily forget to update one of them, it is recommended to `build and preview the documentation locally `_. ./docs/reference/rocPRIM-data-type-support.rst0000664000175100017510000000147215176134512021407 0ustar jenkinsjenkins.. meta:: :description: rocPRIM API library data type support :keywords: rocPRIM, ROCm, API library, API reference, data type, support .. _data-type-support: ****************************************** Data type support ****************************************** The following table shows the supported input and output datatypes. .. list-table:: Supported Input/Output Types :header-rows: 1 :name: supported-input-output-types * - Type - Support * - int8 - ✅ * - int16 - ✅ * - int32 - ✅ * - int64 - ✅ * - float8 - ❌ * - float16 - ✅ * - bfloat16 - ✅ * - tensorfloat32 - ❌ * - float32 - ✅ * - float64 - ✅ ./docs/reference/ops_summary.rst0000664000175100017510000000627015176134512017113 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _ops-summary: ******************************************************************** Summary of the Operations ******************************************************************** Basics ========= * ``transform`` applies a function to each element of the sequence, equivalent to the functional operation ``map`` * ``select`` takes the first `N`` elements of the sequence satisfying a condition (via a selection mask or a predicate function) * ``unique`` returns unique elements within a sequence * ``histogram`` generates a summary of the statistical distribution of the sequence Aggregation ============ * ``reduce`` traverses the sequence while accumulating some data, equivalent to the functional operation ``fold_left`` * ``scan`` is the cumulative version of ``reduce`` which returns the sequence of the intermediate values taken by the accumulator Differentiation ================= * ``adjacent_difference`` computes the difference between the current element and the previous or next one in the sequence * ``discontinuity`` detects value change between the current element and the previous or next one in the sequence Rearrangement ================ * ``sort`` rearranges the sequence by sorting it. It could be according to a comparison operator or a value using a radix approach * ``partial_sort`` rearranges the sequence by sorting it up to and including a given index, according to a comparison operator. * ``nth_element`` places the nth element in its sorted position, with elements less-than before, and greater after, according to a comparison operator. * ``exchange`` rearranges the elements according to a different stride configuration which is equivalent to a tensor axis transposition * ``shuffle`` rotates the elements Partition/Merge ==================== * ``partition`` divides the sequence into two or more sequences according to a predicate while preserving some ordering properties * ``merge`` merges two ordered sequences into one while preserving the order Data Movement =============== * ``store`` stores the sequence to a continuous memory zone. There are variations to use an optimized path or to specify how to store the sequence to better fit the access patterns of the CUs. * ``load`` the complementary operations of the above ones. * ``memcpy`` copies bytes between device sources and destinations Sequence Search =============== * ``find_first_of`` searches for the first occurrence of any of the provided elements. * ``adjacent_find`` searches a given sequence for the first occurence of two consecutive equal elements. * ``search`` searches for the first occurrence of the sequence. * ``search_n`` searches for the first occurrence of a sequence of count elements all equal to value. * ``find_end`` searches for the last occurrence of the sequence. Other operations ====================== * ``run_length_encode`` generates a compact representation of a sequence * ``binary_search`` finds for each element the index of an element with the same value in another sequence (which has to be sorted) * ``config`` selects a kernel's grid/block dimensions to tune the operation to a GPU ./docs/reference/reference.rst0000664000175100017510000000074015176134512016467 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _reference: ******************************************************************** rocPRIM API Reference ******************************************************************** * :ref:`dev-index` * :ref:`block-index` * :ref:`warp-index` * :ref:`thread-index` * :ref:`iterators` * :ref:`intrinsics` * :ref:`type_traits` * :ref:`types` ./docs/reference/data-type-support.rst0000664000175100017510000000147215176134454020143 0ustar jenkinsjenkins.. meta:: :description: rocPRIM API library data type support :keywords: rocPRIM, ROCm, API library, API reference, data type, support .. _data-type-support: ****************************************** Data type support ****************************************** The following table shows the supported input and output datatypes. .. list-table:: Supported Input/Output Types :header-rows: 1 :name: supported-input-output-types * - Type - Support * - int8 - ✅ * - int16 - ✅ * - int32 - ✅ * - int64 - ✅ * - float8 - ❌ * - float16 - ✅ * - bfloat16 - ✅ * - tensorfloat32 - ❌ * - float32 - ✅ * - float64 - ✅ ./docs/reference/iterators.rst0000664000175100017510000000364615176134512016555 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _iterators: ******************************************************************** Iterators ******************************************************************** Constant ========== .. doxygenclass:: rocprim::constant_iterator :members: .. note:: For example, ``constant_iterator(20)`` generates the infinite sequence:: 20 20 20 ... Counting ========== .. doxygenclass:: rocprim::counting_iterator :members: .. note:: For example, ``counting_iterator(20)`` generates the infinite sequence:: 20 21 22 23 ... .. _transform: Transform ============ .. doxygenclass:: rocprim::transform_iterator :members: .. note:: ``transform_iterator(sequence, transform)`` should generate the sequence:: transform(sequence(0)) transform(sequence(1)) ... Predicate --------- .. doxygenclass:: rocprim::predicate_iterator :members: .. note:: ``predicate_iterator(sequence, test, predicate)`` generates the sequence:: predicate(test[0]) ? sequence[0] : default predicate(test[1]) ? sequence[1] : default predicate(test[2]) ? sequence[2] : default ... Pairing Values with Indices ============================= .. doxygenclass:: rocprim::arg_index_iterator :members: .. note:: ``arg_index_iterator(sequence)`` generates the sequence of tuples:: (0, sequence[0]) (1, sequence[1]) ... Zip ============== .. doxygenclass:: rocprim::zip_iterator :members: .. note:: ``zip_iterator(sequence_X, sequence_Y)`` generates the sequence of tuples:: (sequence_X[0], sequence_Y[0]) (sequence_X[1], sequence_Y[1]) ... Discard ============== .. doxygenclass:: rocprim::discard_iterator :members: Texture Cache ================ .. doxygenclass:: rocprim::texture_cache_iterator :members: ./docs/reference/types.rst0000664000175100017510000000145015176134512015674 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _types: ******************************************************************** rocPRIM Utility types ******************************************************************** Double buffer ============= .. doxygenclass:: rocprim::double_buffer :members: Future value ============ .. doxygenclass:: rocprim::future_value :members: Key-value pair ============== .. doxygenstruct:: rocprim::key_value_pair :members: Tuple ===== .. doxygenclass:: rocprim::tuple :members: .. doxygenclass:: rocprim::tuple_size :members: .. doxygenstruct:: rocprim::tuple_element :members: Uninitialized Array =================== .. doxygenclass:: rocprim::uninitialized_array :members: ./docs/reference/acknowledge.rst0000664000175100017510000000071515176134512017016 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _acknowledge: ******************************************************************** Acknowledgements ******************************************************************** The following contributors helped to make this documentation better: * `v01dXYZ `_ has proposed a new structure for the documentation. ./docs/reference/intrinsics.rst0000664000175100017510000000330415176134512016715 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _intrinsics: ******************************************************************** Intrinsics ******************************************************************** Bitwise ======== .. doxygenfunction:: rocprim::get_bit(int x, int i) .. doxygenfunction:: rocprim::bit_count(unsigned int x) .. doxygenfunction:: rocprim::bit_count(unsigned long long x) .. doxygenfunction:: rocprim::ctz(unsigned int x) .. doxygenfunction:: rocprim::ctz(unsigned long long x) Warp size =========== .. doxygenfunction:: rocprim::warp_size() .. doxygenfunction:: rocprim::host_warp_size(const int device_id, unsigned int& warp_size) .. doxygenfunction:: rocprim::host_warp_size(const hipStream_t stream, unsigned int& warp_size) .. doxygenfunction:: rocprim::device_warp_size() Lane and Warp ID ================= .. doxygengroup:: intrinsicsmodule_warp_id :content-only: Flat ID ========== .. doxygengroup:: intrinsicsmodule_flat_id :content-only: Flat Size =========== .. doxygenfunction:: rocprim::flat_block_size() .. doxygenfunction:: rocprim::flat_tile_size() Synchronization ================= .. doxygenfunction:: rocprim::syncthreads() .. doxygenfunction:: rocprim::wave_barrier() Active threads ================== .. doxygenfunction:: rocprim::ballot (int predicate) .. doxygenfunction:: rocprim::group_elect(lane_mask_type mask) .. doxygenfunction:: rocprim::masked_bit_count (lane_mask_type x, unsigned int add=0) .. doxygenfunction:: rocprim::match_any(unsigned int label, bool valid = true) .. doxygenfunction:: rocprim::match_any(unsigned int label, unsigned int label_bits, bool valid = true) ./docs/reference/rocPRIM-glossary.rst0000664000175100017510000000522615176134454017656 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _glossary: ******************************************************************** Glossary of rocPRIM terms ******************************************************************** This glossary is to help users understand the basic concepts or terminologies used in the rocPRIM library. .. glossary:: :sorted: Wavefront A group of threads that runs using the single instruction, multiple thread (SIMT) model. Wave See :term:`wavefront`. rocPRIM uses "warp", "wave", and "wavefront" interchangeably. Work-item A work-item is the smallest unit of parallel execution. A work-item runs a single independent instruction stream on a single data element. Thread See :term:`work-item`. rocPrim uses "thread" and "work-item" interchangeably. Warp Alternate term for a :term:`wavefront`. rocPRIM uses "warp", "wave", and "wavefront" interchangeably. Hardware warp size The number of threads in a warp as defined by the hardware. On AMD GPUs, the warp size can be either thirty-two (32) or sixty-four (64) threads. Logical warp size The number of threads in a warp as defined by the user. This can be equal to or less than the size of the hardware warp size. Block See :term:`tile`. rocPRIM uses "block" and "tile" interchangeably. Stride The number of threads per block. Tile A group of warps that run on the same streaming multiprocessor (SM). Threads in the block can be indexed using one dimension, {X}, two dimensions, {X, Y}, or three dimensions, {X, Y, Z}. In rocPRIM the tile size is always the same as the block size. Grid A group of blocks that all run the same kernel call. Warp ID The identifier of the warp within a block. A warp's warp ID is guaranteed to be unique. Thread ID The identifier of the thread within a block. Lane ID The identifier of the thread within the warp. Flat ID The flattened block or thread idex. The flat ID is a one-dimensional index created from two-dimensional or three-dimensional indices. For example the flat ID of a two-dimensional thread ID {X, Y} in a two-dimensional ``128x4`` block is ``Y*128*X``. SIMT See :term:`Single-Instruction, Multi-Thread`. Single-Instruction, Multi-Thread Single-instruction, multi-thread (SIMT) is a parallel computing model where all the :term:`work-items` within a :term:`wavefront` run the same instruction on different data. ./docs/license.rst0000664000175100017510000000006115176134512014211 0ustar jenkinsjenkinsLicense ======= .. include:: ../LICENSE.txt ./docs/CMakeLists.txt0000664000175100017510000000257015176134454014611 0ustar jenkinsjenkins# MIT License # # Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # rocPRIM documentation include(GNUInstallDirs) rocm_add_sphinx_doc( "${CMAKE_CURRENT_SOURCE_DIR}" BUILDER html OUTPUT_DIR html USES_DOXYGEN ) install( DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html" DESTINATION ${CMAKE_INSTALL_DOCDIR} ) ./docs/sphinx/0000775000175100017510000000000015176134512013351 5ustar jenkinsjenkins./docs/sphinx/requirements.txt0000664000175100017510000001224615176134454016647 0ustar jenkinsjenkins# # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile requirements.in # accessible-pygments==0.0.5 # via pydata-sphinx-theme alabaster==0.7.16 # via sphinx asttokens==3.0.0 # via stack-data attrs==25.1.0 # via # jsonschema # jupyter-cache # referencing babel==2.15.0 # via # pydata-sphinx-theme # sphinx beautifulsoup4==4.12.3 # via pydata-sphinx-theme breathe==4.35.0 # via rocm-docs-core certifi==2024.7.4 # via requests cffi==1.16.0 # via # cryptography # pynacl charset-normalizer==3.3.2 # via requests click==8.1.7 # via # jupyter-cache # sphinx-external-toc comm==0.2.2 # via ipykernel cryptography==44.0.1 # via pyjwt debugpy==1.8.12 # via ipykernel decorator==5.1.1 # via ipython deprecated==1.2.14 # via pygithub docutils==0.21.2 # via # breathe # myst-parser # pydata-sphinx-theme # sphinx exceptiongroup==1.2.2 # via ipython executing==2.2.0 # via stack-data fastjsonschema==2.20.0 # via # nbformat # rocm-docs-core gitdb==4.0.11 # via gitpython gitpython==3.1.43 # via rocm-docs-core greenlet==3.1.1 # via sqlalchemy idna==3.7 # via requests imagesize==1.4.1 # via sphinx importlib-metadata==8.6.1 # via # jupyter-cache # myst-nb ipykernel==6.29.5 # via myst-nb ipython==8.31.0 # via # ipykernel # myst-nb jedi==0.19.2 # via ipython jinja2==3.1.6 # via # myst-parser # sphinx jsonschema==4.23.0 # via nbformat jsonschema-specifications==2024.10.1 # via jsonschema jupyter-cache==1.0.1 # via myst-nb jupyter-client==8.6.3 # via # ipykernel # nbclient jupyter-core==5.7.2 # via # ipykernel # jupyter-client # nbclient # nbformat markdown-it-py==3.0.0 # via # mdit-py-plugins # myst-parser markupsafe==2.1.5 # via jinja2 matplotlib-inline==0.1.7 # via # ipykernel # ipython mdit-py-plugins==0.4.1 # via myst-parser mdurl==0.1.2 # via markdown-it-py myst-nb==1.1.2 # via rocm-docs-core myst-parser==3.0.1 # via myst-nb nbclient==0.10.2 # via # jupyter-cache # myst-nb nbformat==5.10.4 # via # jupyter-cache # myst-nb # nbclient nest-asyncio==1.6.0 # via ipykernel packaging==24.1 # via # ipykernel # pydata-sphinx-theme # sphinx parso==0.8.4 # via jedi pexpect==4.9.0 # via ipython platformdirs==4.3.6 # via jupyter-core prompt-toolkit==3.0.50 # via ipython psutil==6.1.1 # via ipykernel ptyprocess==0.7.0 # via pexpect pure-eval==0.2.3 # via stack-data pycparser==2.22 # via cffi pydata-sphinx-theme==0.15.4 # via # rocm-docs-core # sphinx-book-theme pygithub==2.3.0 # via rocm-docs-core pygments==2.18.0 # via # accessible-pygments # ipython # pydata-sphinx-theme # sphinx pyjwt[crypto]==2.8.0 # via pygithub pynacl==1.5.0 # via pygithub python-dateutil==2.9.0.post0 # via jupyter-client pyyaml==6.0.1 # via # jupyter-cache # myst-nb # myst-parser # rocm-docs-core # sphinx-external-toc pyzmq==26.2.0 # via # ipykernel # jupyter-client referencing==0.36.2 # via # jsonschema # jsonschema-specifications requests==2.32.3 # via # pygithub # sphinx rocm-docs-core==1.20.1 # via -r requirements.in rpds-py==0.22.3 # via # jsonschema # referencing six==1.17.0 # via python-dateutil smmap==5.0.1 # via gitdb snowballstemmer==2.2.0 # via sphinx soupsieve==2.5 # via beautifulsoup4 sphinx==7.3.7 # via # breathe # myst-nb # myst-parser # pydata-sphinx-theme # rocm-docs-core # sphinx-book-theme # sphinx-copybutton # sphinx-design # sphinx-external-toc # sphinx-notfound-page sphinx-book-theme==1.1.3 # via rocm-docs-core sphinx-copybutton==0.5.2 # via rocm-docs-core sphinx-design==0.6.0 # via rocm-docs-core sphinx-external-toc==1.0.1 # via rocm-docs-core sphinx-notfound-page==1.0.2 # via rocm-docs-core sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 # via sphinx sphinxcontrib-htmlhelp==2.0.5 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx sphinxcontrib-qthelp==1.0.7 # via sphinx sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy==2.0.37 # via jupyter-cache stack-data==0.6.3 # via ipython tabulate==0.9.0 # via jupyter-cache tomli==2.0.1 # via sphinx tornado==6.4.2 # via # ipykernel # jupyter-client traitlets==5.14.3 # via # comm # ipykernel # ipython # jupyter-client # jupyter-core # matplotlib-inline # nbclient # nbformat typing-extensions==4.12.2 # via # ipython # myst-nb # pydata-sphinx-theme # pygithub # referencing # sqlalchemy urllib3==2.2.2 # via # pygithub # requests wcwidth==0.2.13 # via prompt-toolkit wrapt==1.16.0 # via deprecated zipp==3.21.0 # via importlib-metadata ./docs/sphinx/_toc.yml.in0000664000175100017510000000675115176134512015436 0ustar jenkinsjenkinsdefaults: numbered: False root: index subtrees: - caption: Installation entries: - file: install/rocPRIM-prerequisites.rst title: Prerequisites - file: install/rocPRIM-install-overview.rst title: Installation overview - file: install/rocPRIM-build-install-linux.rst title: Installing on Linux - file: install/rocPRIM-build-install-windows.rst title: Building and installing from source - caption: Conceptual entries: - file: conceptual/rocPRIM-stripe-block.rst - file: conceptual/rocPRIM-operations.rst - file: conceptual/rocPRIM-scope.rst - file: conceptual/rocPRIM-performance-tuning.rst - file: conceptual/rocPRIM-type-traits.rst - caption: How to entries: - file: how-to/rocPRIM-spir-v.rst title: Use the SPIR-V target - caption: Reference entries: - file: reference/rocPRIM-glossary.rst - file: reference/data-type-support.rst - file: reference/iterators.rst - file: reference/types.rst - file: conceptual/type_traits.rst - file: device_ops/index.rst subtrees: - entries: - file: device_ops/config.rst - file: device_ops/transform.rst - file: device_ops/unique.rst - file: device_ops/sort.rst - file: device_ops/partial_sort.rst - file: device_ops/nth_element.rst - file: device_ops/merge.rst - file: device_ops/partition.rst - file: device_ops/run_length_encoding.rst - file: device_ops/scan.rst - file: device_ops/search_n.rst - file: device_ops/select.rst - file: device_ops/reduce.rst - file: device_ops/adjacent_difference.rst - file: device_ops/adjacent_find.rst - file: device_ops/binary_search.rst - file: device_ops/histogram.rst - file: device_ops/device_copy.rst - file: device_ops/memcpy.rst - file: device_ops/find_first_of.rst - file: device_ops/find_end.rst - file: device_ops/search.rst - file: block_ops/index.rst subtrees: - entries: - file: block_ops/ops_classes/index.rst subtrees: - entries: - file: block_ops/ops_classes/load.rst - file: block_ops/ops_classes/store.rst - file: block_ops/ops_classes/adjacent_difference.rst - file: block_ops/ops_classes/discontinuity.rst - file: block_ops/ops_classes/scan.rst - file: block_ops/ops_classes/reduce.rst - file: block_ops/ops_classes/run_length_decode.rst - file: block_ops/ops_classes/shuffle.rst - file: block_ops/ops_classes/exchange.rst - file: block_ops/ops_classes/sort.rst - file: block_ops/ops_classes/histogram.rst - file: block_ops/data_mov_funcs.rst - file: warp_ops/index.rst subtrees: - entries: - file: warp_ops/load.rst - file: warp_ops/store.rst - file: warp_ops/reduce.rst - file: warp_ops/scan.rst - file: warp_ops/sort.rst - file: warp_ops/shuffle.rst - file: warp_ops/exchange.rst - file: thread_ops/index.rst subtrees: - entries: - file: thread_ops/radix_key_codec.rst - file: thread_ops/thread_operators.rst - file: thread_ops/thread_load.rst - file: thread_ops/thread_reduce.rst - file: thread_ops/thread_scan.rst - file: thread_ops/thread_search.rst - file: thread_ops/thread_store.rst - file: reference/developer.rst - file: license.rst ./docs/sphinx/requirements.in0000664000175100017510000000002715176134454016430 0ustar jenkinsjenkinsrocm-docs-core==1.20.1 ./docs/block_ops/0000775000175100017510000000000015176134512014013 5ustar jenkinsjenkins./docs/block_ops/ops_classes/0000775000175100017510000000000015176134454016336 5ustar jenkinsjenkins./docs/block_ops/ops_classes/adjacent_difference.rst0000664000175100017510000000057215176134454023017 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-adjacent_difference: ******************************************************************** Adjacent difference ******************************************************************** .. doxygenclass:: rocprim::block_adjacent_difference :members: ./docs/block_ops/ops_classes/discontinuity.rst0000664000175100017510000000055015176134454021775 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-discontinuity: ******************************************************************** Discontinuity ******************************************************************** .. doxygenclass:: rocprim::block_discontinuity :members: ./docs/block_ops/ops_classes/histogram.rst0000664000175100017510000000067215176134454021072 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-histogram: ******************************************************************** Histogram ******************************************************************** Class ========= .. doxygenclass:: rocprim::block_histogram :members: Algorithms =========== .. doxygenenum:: rocprim::block_histogram_algorithm ./docs/block_ops/ops_classes/reduce.rst0000664000175100017510000000066015176134454020341 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-reduce: ******************************************************************** Reduce ******************************************************************** Class ========== .. doxygenclass:: rocprim::block_reduce :members: Algorithms ============ .. doxygenenum:: rocprim::block_reduce_algorithm ./docs/block_ops/ops_classes/index.rst0000664000175100017510000000103115176134454020172 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _class-index: ******************************************************************** Operation classes ******************************************************************** * :ref:`blk-load` * :ref:`blk-store` * :ref:`blk-adjacent_difference` * :ref:`blk-discontinuity` * :ref:`blk-scan` * :ref:`blk-reduce` * :ref:`blk-shuffle` * :ref:`blk-exchange` * :ref:`blk-sort` * :ref:`blk-histogram` ./docs/block_ops/ops_classes/exchange.rst0000664000175100017510000000053115176134454020651 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-exchange: ******************************************************************** Exchange ******************************************************************** .. doxygenclass:: rocprim::block_exchange :members: ./docs/block_ops/ops_classes/sort.rst0000664000175100017510000000076615176134454020070 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-sort: ******************************************************************** Sort ******************************************************************** Generic Block Sort ================== .. doxygenclass:: rocprim::block_sort :members: .. doxygenenum:: rocprim::block_sort_algorithm Radix sort =========== .. doxygenclass:: rocprim::block_radix_sort :members: ./docs/block_ops/ops_classes/run_length_decode.rst0000664000175100017510000000056415176134454022545 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-run_length_decode: ******************************************************************** Run-length decode ******************************************************************** .. doxygenclass:: rocprim::block_run_length_decode :members: ./docs/block_ops/ops_classes/store.rst0000664000175100017510000000064415176134454020230 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-store: ******************************************************************** Store ******************************************************************** Class ====== .. doxygenclass:: rocprim::block_store :members: Algorithms =========== .. doxygenenum:: rocprim::block_store_method ./docs/block_ops/ops_classes/scan.rst0000664000175100017510000000064715176134454020023 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-scan: ******************************************************************** Scan ******************************************************************** Class ======= .. doxygenclass:: rocprim::block_scan :members: Algorithms ============== .. doxygenenum:: rocprim::block_scan_algorithm ./docs/block_ops/ops_classes/shuffle.rst0000664000175100017510000000052615176134454020527 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-shuffle: ******************************************************************** Shuffle ******************************************************************** .. doxygenclass:: rocprim::block_shuffle :members: ./docs/block_ops/ops_classes/load.rst0000664000175100017510000000064715176134454020016 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _blk-load: ******************************************************************** Load ******************************************************************** Class ========== .. doxygenclass:: rocprim::block_load :members: Algorithms ============== .. doxygenenum:: rocprim::block_load_method ./docs/block_ops/index.rst0000664000175100017510000000114415176134512015654 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _block-index: ******************************************************************** Block-Wide Operations ******************************************************************** * :ref:`class-index` * :ref:`blk-load` * :ref:`blk-store` * :ref:`blk-adjacent_difference` * :ref:`blk-discontinuity` * :ref:`blk-scan` * :ref:`blk-reduce` * :ref:`blk-shuffle` * :ref:`blk-exchange` * :ref:`blk-sort` * :ref:`blk-histogram` * :ref:`data_mov_funcs` ./docs/block_ops/data_mov_funcs.rst0000664000175100017510000000473215176134512017543 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _data_mov_funcs: ******************************************************************** Data movement functions ******************************************************************** Direct Blocked =============== Load ------ .. doxygenfunction:: rocprim::block_load_direct_blocked(unsigned int flat_id, InputIterator block_input, T (&items)[ItemsPerThread]) .. doxygenfunction:: rocprim::block_load_direct_blocked(unsigned int flat_id, InputIterator block_input, T (&items)[ItemsPerThread], unsigned int valid) .. doxygenfunction:: rocprim::block_load_direct_blocked (unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread], unsigned int valid, Default out_of_bounds) Store ---------- .. doxygenfunction:: rocprim::block_store_direct_blocked (unsigned int flat_id, OutputIterator block_output, T(&items)[ItemsPerThread]) .. doxygenfunction:: rocprim::block_store_direct_blocked (unsigned int flat_id, OutputIterator block_output, T(&items)[ItemsPerThread], unsigned int valid) Direct Blocked Vectorized =========================== Load ------- .. doxygenfunction:: rocprim::block_load_direct_blocked_vectorized (unsigned int flat_id, T *block_input, U(&items)[ItemsPerThread]) Store ---------- .. doxygenfunction:: rocprim::block_store_direct_blocked_vectorized (unsigned int flat_id, T *block_output, U(&items)[ItemsPerThread]) Direct Striped ================== Load --------- .. doxygenfunction:: rocprim::block_load_direct_striped (unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread]) .. doxygenfunction:: rocprim::block_load_direct_striped (unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread], unsigned int valid) .. doxygenfunction:: rocprim::block_load_direct_striped (unsigned int flat_id, InputIterator block_input, T(&items)[ItemsPerThread], unsigned int valid, Default out_of_bounds) Store ---------- .. doxygenfunction:: rocprim::block_store_direct_striped (unsigned int flat_id, OutputIterator block_output, T(&items)[ItemsPerThread]) .. doxygenfunction:: rocprim::block_store_direct_striped (unsigned int flat_id, OutputIterator block_output, T(&items)[ItemsPerThread], unsigned int valid) Direct Warp Striped ==================== Load --------- .. doxygengroup:: blockmodule_warp_load_functions :content-only: Store ---------- .. doxygengroup:: blockmodule_warp_store_functions :content-only: ./docs/.gitignore0000664000175100017510000000011715176134454014034 0ustar jenkinsjenkins/_build/ /_doxygen/ /doxygen/html /doxygen/xml /doxygen/*.tag /sphinx/_toc.yml ./docs/thread_ops/0000775000175100017510000000000015176134512014170 5ustar jenkinsjenkins./docs/thread_ops/thread_reduce.rst0000664000175100017510000000050015176134454017520 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _thread_reduce: ******************************************************************** Reduce ******************************************************************** .. doxygengroup:: thread_reduce ./docs/thread_ops/thread_operators.rst0000664000175100017510000000144115176134454020274 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _thread_operators: ******************************************************************** Operators ******************************************************************** Equality ======== .. doxygenstruct:: rocprim::equality :members: Inequality ========== .. doxygenstruct:: rocprim::inequality :members: .. doxygenstruct:: rocprim::inequality_wrapper :members: Sum === .. doxygenstruct:: rocprim::sum :members: Max/Min ======= .. doxygenstruct:: rocprim::max :members: .. doxygenstruct:: rocprim::min :members: ArgMax/ArgMin ============= .. doxygenstruct:: rocprim::arg_max :members: .. doxygenstruct:: rocprim::arg_min :members: ./docs/thread_ops/thread_search.rst0000664000175100017510000000050015176134454017516 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _thread_search: ******************************************************************** Search ******************************************************************** .. doxygengroup:: thread_search ./docs/thread_ops/index.rst0000664000175100017510000000074715176134512016041 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _thread-index: ******************************************************************** Thread-Level Operations ******************************************************************** * :ref:`radix-key-codec` * :ref:`thread_operators` * :ref:`thread_load` * :ref:`thread_reduce` * :ref:`thread_scan` * :ref:`thread_search` * :ref:`thread_store` ./docs/thread_ops/thread_store.rst0000664000175100017510000000047515176134454017420 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _thread_store: ******************************************************************** Store ******************************************************************** .. doxygengroup:: thread_store ./docs/thread_ops/thread_load.rst0000664000175100017510000000047215176134454017200 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _thread_load: ******************************************************************** Load ******************************************************************** .. doxygengroup:: thread_load ./docs/thread_ops/radix_key_codec.rst0000664000175100017510000000055615176134512020044 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _radix-key-codec: ******************************************************************** Radix Key Encoder/Decoder ******************************************************************** .. doxygenclass:: rocprim::radix_key_codec :members: ./docs/thread_ops/thread_scan.rst0000664000175100017510000000065415176134454017207 0ustar jenkinsjenkins.. meta:: :description: rocPRIM documentation and API reference library :keywords: rocPRIM, ROCm, API, documentation .. _thread_scan: ******************************************************************** Scan ******************************************************************** Exclusive Scan ============== .. doxygengroup:: thread_scan_exclusive Inclusive Scan ============== .. doxygengroup:: thread_scan_inclusive ./docs/doxygen/0000775000175100017510000000000015176134454013522 5ustar jenkinsjenkins./docs/doxygen/threadmodule.dox0000664000175100017510000000022515176134454016712 0ustar jenkinsjenkins/** @brief rocPRIM Thread-level parallel primitives @author @file */ /** * \defgroup threadmodule Thread-level * \ingroup primitivesmodule * */ ./docs/doxygen/primitivesmodule.dox0000664000175100017510000000033015176134454017633 0ustar jenkinsjenkins/** @brief rocPRIM parallel primitives. @author @file */ /** * \defgroup primitivesmodule Parallel primitives * */ /** * \defgroup primitivesmodule_deviceconfigs Kernel configs * \ingroup primitivesmodule */./docs/doxygen/intrinsicsmodule.dox0000664000175100017510000000043415176134454017632 0ustar jenkinsjenkins/** @brief rocPRIM intrinsic functions. @author @file */ /** * \defgroup intrinsicsmodule Intrinsic functions */ /** * \defgroup intrinsicsmodule_flat_id Flat ID * \ingroup intrinsicsmodule */ /** * \defgroup intrinsicsmodule_warp_id Warp ID * \ingroup intrinsicsmodule */ ./docs/doxygen/warpmodule.dox0000664000175100017510000000021415176134454016412 0ustar jenkinsjenkins/** @brief rocPRIM Warp-wide parallel primitives @author @file */ /** * \defgroup warpmodule Warp-wide * \ingroup primitivesmodule * */./docs/doxygen/blockmodule.dox0000664000175100017510000000101015176134454016526 0ustar jenkinsjenkins/** @brief rocPRIM Block-wide parallel primitives @author @file */ /** * \defgroup blockmodule Block-wide * \ingroup primitivesmodule * */ /** * \defgroup blockmodule_warp_load_functions Load Functions * \ingroup blockmodule */ /** * \defgroup blockmodule_warp_store_functions Store Functions * \ingroup blockmodule */ /** * \defgroup blockmodule_cast_load_functions Load Functions * \ingroup blockmodule */ /** * \defgroup blockmodule_cast_store_functions Store Functions * \ingroup blockmodule */ ./docs/doxygen/devicemodule.dox0000664000175100017510000000020715176134454016702 0ustar jenkinsjenkins/** @brief rocPRIM Device-wide primitives @author @file */ /** * \defgroup devicemodule Device-wide * \ingroup primitivesmodule */ ./docs/doxygen/iteratormodule.dox0000664000175100017510000000015415176134454017275 0ustar jenkinsjenkins/** @brief rocPRIM fancy iterators. @author @file */ /** * \defgroup iteratormodule Fancy iterators * */./docs/doxygen/glossary.dox0000664000175100017510000000350515176134454016104 0ustar jenkinsjenkins/** @brief rocPRIM Glossary @author @file */ /** * \defgroup rocprim_glossary rocPRIM Glossary * @{ * This glossary is to help users understand the basic concepts or terminologies used in the rocPRIM library. * * @page rocprim_glossary_page Terminologies * * * \par Warp * Refers to a group of threads that execute in SIMT (Single Instruction, Multiple Thread) fashion. Also known as \n * wavefronts on AMD GPUs. * * \par Hardware Warp Size * Refers to the number of threads in a warp defined by the hardware. On Nvidia GPUs, a warp size is 32, \n * while on AMD GPUs, a warp size is 64. * * \par Logical Warp Size * Refers to the number of threads in a warp defined by the user, which can be equal to or less than \n * the size of the hardware warp size. * * \par Lane ID * Refers to the thread identifier within the warp. A logical lane ID refers to the thread identifier in a \n "logical * warp", which can be smaller than a hardware warp size (And can be defined as `lane_id() % WarpSize`). * * \par Warp ID * Refers to the identifier of the hardware/logical warp in a block. Warp ID is guaranteed to be unique among warps. * * \par Block * Refers to a group of threads that are executed on the same compute unit (streaming multiprocessor). These threads can \n * be indexed using 1 Dimension {X}, 2 Dimensions {X, Y} or 3 Dimensions {X, Y, Z}. A block consists of multiple warps. * * \par Tile * Refers to a block, but in the C++AMP nomenclature. * * \par Flat ID * Refers to a flattened identifier of a block (tile) or a thread identifier. Flat ID is a 1D value created from 2D or 3D \n * identifier. Example: flat id of thread id (X, Y) in 2D thread block 128x4 (XxY) is Y * 128 + X. * * @} */./docs/doxygen/mainpage.dox0000664000175100017510000000041515176134454016017 0ustar jenkinsjenkins/** @brief The rocPRIM project main page. @author @file */ /** @mainpage rocPRIM Documentation @section overview Overview The rocPRIM is a header-only library providing HIP parallel primitives for developing performant GPU-accelerated code on AMD ROCm platform. */ ./docs/doxygen/Doxyfile0000664000175100017510000035075715176134454015251 0ustar jenkinsjenkins# Doxyfile 1.9.4 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). # # Note: # # Use doxygen to compare the used configuration file with the template # configuration file: # doxygen -x [configFile] # Use doxygen to compare the used configuration file with the template # configuration file without replacing the environment variables: # doxygen -x_noenv [configFile] #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = rocPRIM # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = . # If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 # sub-directories (in 2 levels) under the output directory of each output format # and will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to # control the number of sub-directories. # The default value is: NO. CREATE_SUBDIRS = NO # Controls the number of sub-directories that will be created when # CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every # level increment doubles the number of directories, resulting in 4096 # directories at level 8 which is the default and also the maximum value. The # sub-directories are organized in 2 levels, the first level always has a fixed # numer of 16 directories. # Minimum value: 0, maximum value: 8, default value: 8. # This tag requires that the tag CREATE_SUBDIRS is set to YES. CREATE_SUBDIRS_LEVEL = 8 # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, # Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English # (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, # Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with # English messages), Korean, Korean-en (Korean with English messages), Latvian, # Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, # Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, # Swedish, Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = YES # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line # such as # /*************** # as being the beginning of a Javadoc-style comment "banner". If set to NO, the # Javadoc-style will behave just like regular comments and it will not be # interpreted by doxygen. # The default value is: NO. JAVADOC_BANNER = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # By default Python docstrings are displayed as preformatted text and doxygen's # special commands cannot be used. By setting PYTHON_DOCSTRING to NO the # doxygen's special commands can be used and the contents of the docstring # documentation blocks is shown as doxygen documentation. # The default value is: YES. PYTHON_DOCSTRING = YES # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". Note that you cannot put \n's in the value part of an alias # to insert newlines (in the resulting output). You can put ^^ in the value part # of an alias to insert a newline as if a physical newline was in the original # file. When you need a literal { or } or , in the value part of an alias you # have to escape them by means of a backslash (\), this can lead to conflicts # with the commands \{ and \} for these it is advised to use the version @{ and # @} or use a double escape (\\{ and \\}) ALIASES = skip_doxy_start=\{ \ skip_doxy_end=\} # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, # Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, # VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files # as Fortran files (default is PHP), and .f files as C (default is Fortran), # use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. When specifying no_extension you should add # * to the FILE_PATTERNS. # # Note see also the list of default file extension mappings. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 5 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 # The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use # during processing. When set to 0 doxygen will based this on the number of # cores available in the system. You can set it explicitly to a value larger # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, # which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. NUM_PROC_THREADS = 1 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual # methods of a class will be included in the documentation. # The default value is: NO. EXTRACT_PRIV_VIRTUAL = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If this flag is set to YES, the name of an unnamed parameter in a declaration # will be determined by the corresponding definition. By default unnamed # parameters remain unnamed in the output. # The default value is: YES. RESOLVE_UNNAMED_PARAMS = YES # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # With the correct setting of option CASE_SENSE_NAMES doxygen will better be # able to match the capabilities of the underlying filesystem. In case the # filesystem is case sensitive (i.e. it supports files in the same directory # whose names only differ in casing), the option must be set to YES to properly # deal with such files in case they appear in the input. For filesystems that # are not case sensitive the option should be set to NO to properly deal with # output files written for symbols that only differ in casing, such as for two # classes, one named CLASS and the other named Class, and to also support # references to files without having to specify the exact matching casing. On # Windows (including Cygwin) and MacOS, users should typically set this option # to NO, whereas on Linux or other Unix flavors it should typically be set to # YES. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_HEADERFILE tag is set to YES then the documentation for a class # will show which file needs to be included to use the class. # The default value is: YES. SHOW_HEADERFILE = YES # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = NO # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. See also section "Changing the # layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as documenting some parameters in # a documented function twice, or documenting parameters that don't exist or # using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete # function parameter documentation. If set to NO, doxygen will accept that some # parameters have no documentation without warning. # The default value is: YES. WARN_IF_INCOMPLETE_DOC = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong parameter # documentation, but not about the absence of documentation. If EXTRACT_ALL is # set to YES then this flag will automatically be disabled. See also # WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but # at the end of the doxygen process doxygen will return with a non-zero status. # Possible values are: NO, YES and FAIL_ON_WARNINGS. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # See also: WARN_LINE_FORMAT # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # In the $text part of the WARN_FORMAT command it is possible that a reference # to a more specific place is given. To make it easier to jump to this place # (outside of doxygen) the user can define a custom "cut" / "paste" string. # Example: # WARN_LINE_FORMAT = "'vi $file +$line'" # See also: WARN_FORMAT # The default value is: at line $line of file $file. WARN_LINE_FORMAT = "at line $line of file $file" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). In case the file specified cannot be opened for writing the # warning and error messages are written to standard error. When as file - is # specified the warning and error messages are written to standard output # (stdout). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = mainpage.dox \ primitivesmodule.dox \ threadmodule.dox \ warpmodule.dox \ blockmodule.dox \ devicemodule.dox \ utilsmodule.dox \ iteratormodule.dox \ intrinsicsmodule.dox \ glossary.dox \ ../../rocprim/include/rocprim # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: # https://www.gnu.org/software/libiconv/) for the list of possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # Note the list of default checked file patterns might differ from the list of # default file extension mappings. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, # *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C # comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, # *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # ANamespace::AClass, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = detail::* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: # http://clang.llvm.org/) for more accurate parsing at the cost of reduced # performance. This can be particularly helpful with template rich C++ code for # which doxygen's built-in parser lacks the necessary type information. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS # tag is set to YES then doxygen will add the directory of each input to the # include path. # The default value is: YES. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_ADD_INC_PATHS = YES # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = # If clang assisted parsing is enabled you can provide the clang parser with the # path to the directory containing a file called compile_commands.json. This # file is the compilation database (see: # http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the # options used when the source files were built. This is equivalent to # specifying the -p option to a clang tool, such as clang-check. These options # will then be passed to the parser. Any options specified with CLANG_OPTIONS # will be added as well. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. CLANG_DATABASE_PATH = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = NO # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have JavaScript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: # https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To # create a documentation set, doxygen will generate a Makefile in the HTML # output directory. Running make will produce the docset in that directory and # running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag determines the URL of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDURL = # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # on Windows. In the beginning of 2021 Microsoft took the original page, with # a.o. the download links, offline the HTML help workshop was already many years # in maintenance mode). You can download the HTML help workshop from the web # archives at Installation executable (see: # http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo # ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location (absolute path # including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to # run qhelpgenerator on the generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine tune the look of the index (see "Fine-tuning the output"). As an # example, the default style sheet generated by doxygen has an example that # shows how to put an image at the root of the tree instead of the PROJECT_NAME. # Since the tree basically has the same information as the tab index, you could # consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NONE # When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the # FULL_SIDEBAR option determines if the side bar is limited to only the treeview # area (value NO) or if it should extend to the full height of the window (value # YES). Setting this to YES gives a layout similar to # https://docs.readthedocs.io with more room for contents, but less room for the # project logo, title, and description. If either GENERATE_TREEVIEW or # DISABLE_INDEX is set to NO, this option has no effect. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. FULL_SIDEBAR = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email # addresses. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. OBFUSCATE_EMAILS = YES # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for # the HTML output. These images will generally look nicer at scaled resolutions. # Possible values are: png (the default) and svg (looks nicer but requires the # pdf2svg or inkscape tool). # The default value is: png. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FORMULA_FORMAT = png # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. FORMULA_MACROFILE = # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # With MATHJAX_VERSION it is possible to specify the MathJax version to be used. # Note that the different versions of MathJax have different requirements with # regards to the different settings, so it is possible that also other MathJax # settings have to be changed when switching between the different MathJax # versions. # Possible values are: MathJax_2 and MathJax_3. # The default value is: MathJax_2. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_VERSION = MathJax_2 # When MathJax is enabled you can set the default output format to be used for # the MathJax output. For more details about the output format see MathJax # version 2 (see: # http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 # (see: # http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best # compatibility. This is the name for Mathjax version 2, for MathJax version 3 # this will be translated into chtml), NativeMML (i.e. MathML. Only supported # for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This # is the name for Mathjax version 3, for MathJax version 2 this will be # translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. The default value is: # - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 # - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # for MathJax version 2 (see https://docs.mathjax.org/en/v2.7-latest/tex.html # #tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # For example for MathJax version 3 (see # http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): # MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: # http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /