././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2918184 rocwmma/0000775000175100017510000000000015206065535012462 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.azuredevops/0000775000175100017510000000000015206065535015107 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.azuredevops/rocm-ci.yml0000664000175100017510000000124115206065535017161 0ustar00jenkinsjenkinsresources: repositories: - repository: pipelines_repo type: github endpoint: ROCm name: ROCm/ROCm variables: - group: common - template: /.azuredevops/variables-global.yml@pipelines_repo trigger: batch: true branches: include: - develop - mainline paths: exclude: - .githooks - .github - .jenkins - docs - '.*.y*ml' - '*.md' pr: autoCancel: true branches: include: - develop - mainline paths: exclude: - .githooks - .github - .jenkins - docs - '.*.y*ml' - '*.md' drafts: false jobs: - template: ${{ variables.CI_COMPONENT_PATH }}/rocWMMA.yml@pipelines_repo ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.clang-format0000775000175100017510000000654215206065535015047 0ustar00jenkinsjenkins# Style file for MLSE Libraries based on the modified rocBLAS style # Common settings BasedOnStyle: WebKit TabWidth: 4 IndentWidth: 4 UseTab: Never ColumnLimit: 100 # Other languages JavaScript, Proto --- Language: Cpp # http://releases.llvm.org/6.0.1/tools/clang/docs/ClangFormatStyleOptions.html#disabling-formatting-on-a-piece-of-code # int formatted_code; # // clang-format off # void unformatted_code ; # // clang-format on # void formatted_code_again; DisableFormat: false Standard: Cpp11 AccessModifierOffset: -4 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false # Configure each individual brace in BraceWrapping BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { AfterCaseLabel: 'true' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' AfterFunction : 'true' AfterNamespace : 'true' AfterStruct : 'true' AfterUnion : 'true' BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' # AfterExternBlock : 'true' } #BreakAfterJavaFieldAnnotations: true #BreakBeforeInheritanceComma: false #BreakBeforeBinaryOperators: None #BreakBeforeTernaryOperators: true #BreakConstructorInitializersBeforeComma: true #BreakStringLiterals: true CommentPragmas: '^ IWYU pragma:' #CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true SpaceBeforeCpp11BracedList: false DerivePointerAlignment: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IndentCaseLabels: false IndentPPDirectives: None #FixNamespaceComments: true IndentWrappedFunctionNames: true KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 NamespaceIndentation: All ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false #SpaceAfterTemplateKeyword: true #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true SortIncludes: true # Comments are for developers, they should arrange them ReflowComments: false #IncludeBlocks: Preserve --- ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.gitattributes0000664000175100017510000000015315206065535015354 0ustar00jenkinsjenkins*.pdf binary *.doc binary *.docx binary *.ppt binary *.pptx binary *.xls binary *.xlsx binary *.xps binary ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.githooks/0000775000175100017510000000000015206065535014367 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.githooks/install0000775000175100017510000000022115206065535015756 0ustar00jenkinsjenkins#!/usr/bin/env bash cd $(git rev-parse --git-dir) cd hooks echo "Installing hooks..." ln -s ../../.githooks/pre-commit pre-commit echo "Done!" ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.githooks/pre-commit0000775000175100017510000000456515206065535016403 0ustar00jenkinsjenkins#!/bin/bash # # This pre-commit hook checks if any versions of clang-format # are installed, and if so, uses the installed version to format # the staged changes. export PATH=/opt/rocm/llvm/bin:/opt/rocm/hcc/bin:/usr/bin:/bin # Redirect stdout to stderr. exec >&2 # Do everything from top - level cd $(git rev-parse --show-toplevel) if git rev-parse --verify HEAD >/dev/null 2>&1; then against=HEAD else # Initial commit: diff against an empty tree object against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 fi if [[ "$1" == "--reformat" ]]; then files=$(git ls-files --exclude-standard) else files=$(git diff-index --cached --name-only $against) fi [[ -z "$files" ]] && exit # Change the copyright date at the top of any text files for file in $files; do echo "Processing copyright dates in $file" if [[ -e $file ]]; then /usr/bin/perl -pi -e 'INIT { exit 1 if !-f $ARGV[0] || -B $ARGV[0]; $mon = (localtime)[4] + 1; $year = (localtime)[5] + 1900 + ($mon >= 10 ? 1 : 0) } s/^([*\/#\/"*[:space:]]*)Copyright\s+(?:\(C\)\s*)?(\d+)(?:\s*-\s*\d+)?\s(Advanced\s*Micro\s*Devices)/qq($1Copyright (C) $2@{[$year != $2 ? "-$year" : ""]} $3)/ie if $. < 10' "$file" && git add -u "$file" fi done # do the formatting for file in $files; do if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.in$|\.txt$|\.yaml$|\.yml$|\.sh$|\.py$|\.pl$|\.cmake$|\.md$|\.rst$|\.groovy$|\.ini$|\.awk$|\.csv$'; then echo "Processing line endings in $file" sed -i -e 's/[[:space:]]*$//' "$file" # Remove whitespace at end of lines sed -i -e '$a\' "$file" # Add missing newline to end of file echo "Converting non-ASCII characters to ASCII equivalents in $file" # Convert UTF8 non-ASCII to ASCII temp=$(mktemp) [[ -w $temp ]] || exit iconv -s -f utf-8 -t ascii//TRANSLIT "$file" > "$temp" || exit chmod --reference="$file" "$temp" || exit mv -f "$temp" "$file" || exit git add -u "$file" fi done # if clang-format exists, run it on C/C++ files if command -v clang-format >/dev/null; then for file in $files; do if [[ -e $file ]] && echo $file | grep -Eq '\.c$|\.h$|\.hpp$|\.cpp$|\.cl$|\.h\.in$|\.hpp\.in$|\.cpp\.in$'; then echo "clang-format $file" clang-format -i -style=file "$file" git add -u "$file" fi done fi ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.github/0000775000175100017510000000000015206065535014022 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.github/CODEOWNERS0000775000175100017510000000047515206065535015426 0ustar00jenkinsjenkins* @cgmillette @congma13 @Ryker0627 @jameseperry @bsyrowik # Documentation files docs/* @ROCm/rocm-documentation *.md @ROCm/rocm-documentation *.rst @ROCm/rocm-documentation # Header directory for Doxygen documentation library/include/* @ROCm/rocm-documentation @cgmillette @congma13 @Ryker0627 @jameseperry @bsyrowik ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.github/dependabot.yml0000664000175100017510000000122315206065535016650 0ustar00jenkinsjenkins# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values directory: "/docs/sphinx" # Location of package manifests open-pull-requests-limit: 10 schedule: interval: "daily" labels: - "documentation" - "dependencies" - "ci:docs-only" reviewers: - "samjwu" ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.github/workflows/0000775000175100017510000000000015206065535016057 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.github/workflows/docs.yaml0000664000175100017510000000446315206065535017702 0ustar00jenkinsjenkinsname: Upload to the upload server # Controls when the workflow will run on: push: branches: [develop, master] tags: - rocm-5.* release: types: [published] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "build" build: # The type of runner that the job will run on runs-on: ubuntu-latest # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - name: getting branch name shell: bash run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" id: branch_name - name: getting tag name shell: bash run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})" id: tag_name - name: zipping files run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*' - name: echo-step run: echo "${{ github.event.release.target_commitish }}" - name: uploading archive to prod if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.PROD_UPLOAD_URL }}' args: '-o ConnectTimeout=5' - name: uploading archive to staging if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }} uses: wlixcc/SFTP-Deploy-Action@v1.0 with: username: ${{ secrets.USERNAME }} server: ${{ secrets.SERVER }} ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip remote_path: '${{ secrets.STG_UPLOAD_URL }}' args: '-o ConnectTimeout=5' ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2758183 rocwmma/.gitignore0000664000175100017510000000067515206065535014462 0ustar00jenkinsjenkins# Compiled Object files *.slo *.lo *.o *.obj # Generated version file rocwmma_version.hpp rocwmma-version.hpp # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app # vim tags tags .tags .*.swp # Editors .vscode # build-in-source directory build* # emacs temporary/backup files .\#* \#*\# *~ *.log ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/.jenkins/0000775000175100017510000000000015206065535014201 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/.jenkins/common.groovy0000664000175100017510000000534415206065535016746 0ustar00jenkinsjenkins// This file is for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. def runCompileCommand(platform, project, jobName, boolean debug=false) { project.paths.construct_build_prefix() def getDependenciesCommand = "" if (project.installLibraryDependenciesFromCI) { project.libraryDependencies.each { libraryName -> getDependenciesCommand += auxiliary.getLibrary(libraryName, platform.jenkinsLabel, 'develop') } } String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release' String buildTypeDir = debug ? 'debug' : 'release' String cmake = platform.jenkinsLabel.contains('centos') ? 'cmake3' : 'cmake' //Set CI node's gfx arch as target if PR, otherwise use default targets of the library String amdgpuTargets = env.BRANCH_NAME.startsWith('PR-') ? '-DAMDGPU_TARGETS="$gfx_arch"' : '' String compilerLauncher = project.defaults.ccache ? '-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache' : '' String cmakeArgs = "-DCMAKE_C_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc ${compilerLauncher} ${buildTypeArg} ${amdgpuTargets}" String hipccCompileFlags = "export HIPCC_COMPILE_FLAGS_APPEND='-O3 -Wno-format-nonliteral -parallel-jobs=1'" // Set number of compile threads to lesser of nproc or 12 int nproc = Runtime.runtime.availableProcessors() int numThreads = Math.min(nproc, 12) def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} ${getDependenciesCommand} echo Original HIPCC_COMPILE_FLAGS_APPEND: \$HIPCC_COMPILE_FLAGS_APPEND ${hipccCompileFlags} mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir} ${auxiliary.gfxTargetParser()} ${cmake} ${cmakeArgs} -DROCWMMA_BUILD_BENCHMARK_TESTS=OFF ../.. make -j ${numThreads} """ platform.runCommand(this, command) } def runTestCommand (platform, project) { def testCommand = "ctest --output-on-failure " def testCommandExclude = "" def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} cd ${project.testDirectory} ${testCommand} ${testCommandExclude} """ platform.runCommand(this, command) } def runPackageCommand(platform, project) { def packageHelper = platform.makePackage(platform.jenkinsLabel,"${project.paths.project_build_prefix}/build/release") platform.runCommand(this, packageHelper[0]) platform.archiveArtifacts(this, packageHelper[1]) } return this ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/.jenkins/precheckin.groovy0000664000175100017510000000426415206065535017571 0ustar00jenkinsjenkins#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path; def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocWMMA', 'precheckin') prj.paths.build_command = './install -c' prj.libraryDependencies = ['hipBLAS-common', 'hipBLASLt', 'rocBLAS'] prj.defaults.ccache = true prj.timeout.compile = 1200 def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx908'],centos7:['gfx908'],centos8:['gfx908'],sles15sp1:['gfx908'],ubuntu20:['gfx90a']])] jobNameList = auxiliary.appendJobNameList(jobNameList, 'rocWMMA') propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName){ runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { runCI([ubuntu22:['gfx90a']], urlJobName) } } ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/.jenkins/staticanalysis.groovy0000664000175100017510000000334115206065535020504 0ustar00jenkinsjenkins#!/usr/bin/env groovy // This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ @Library('rocJenkins@pong') _ // This is file for internal AMD use. // If you are interested in running your own Jenkins, please raise a github issue for assistance. import com.amd.project.* import com.amd.docker.* import java.nio.file.Path def runCompileCommand(platform, project, jobName, boolean debug=false) { project.paths.construct_build_prefix() } def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocWMMA', 'StaticAnalysis') // Define test architectures, optional rocm version argument is available def nodes = new dockerNodes(nodeDetails, jobName, prj) boolean formatCheck = false boolean staticAnalysis = true def compileCommand = { platform, project-> runCompileCommand(platform, project, jobName, false) } buildProject(prj , formatCheck, nodes.dockerArray, compileCommand, null, null, staticAnalysis) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 6')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":[]] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } } ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/.jenkins/staticlibrary.groovy0000664000175100017510000000425615206065535020333 0ustar00jenkinsjenkins#!/usr/bin/env groovy @Library('rocJenkins@pong') _ import com.amd.project.* import com.amd.docker.* import java.nio.file.Path; def runCI = { nodeDetails, jobName-> def prj = new rocProject('rocWMMA', 'Static Library PreCheckin') def nodes = new dockerNodes(nodeDetails, jobName, prj) def commonGroovy boolean formatCheck = false def compileCommand = { platform, project-> commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" commonGroovy.runCompileCommand(platform, project, jobName, false) } def testCommand = { platform, project-> commonGroovy.runTestCommand(platform, project) } def packageCommand = { platform, project-> commonGroovy.runPackageCommand(platform, project) } buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) } ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx908'],centos7:['gfx908'],centos8:['gfx908'],sles15sp1:['gfx908'],ubuntu20:['gfx90a']])] // jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each { jobName, property-> if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } jobNameList.each { jobName, nodeDetails-> if (urlJobName == jobName) stage(jobName) { runCI(nodeDetails, jobName) } } // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) stage(urlJobName) { runCI([ubuntu16:['gfx906']], urlJobName) } } } ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/.readthedocs.yaml0000664000175100017510000000050215206065535015706 0ustar00jenkinsjenkins# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/conf.py formats: [htmlzip, pdf, epub] python: install: - requirements: docs/sphinx/requirements.txt build: os: ubuntu-22.04 tools: python: "3.10" ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/CHANGELOG.md0000664000175100017510000003526615206065535014307 0ustar00jenkinsjenkins# Changelog for rocWMMA Documentation for rocWMMA is available at [https://rocm.docs.amd.com/projects/rocWMMA/en/latest](https://rocm.docs.amd.com/projects/rocWMMA/en/latest). ## (Unreleased) rocWMMA 2.2.0 for ROCm 7.2.0 ### Added * Added sample `perf_i8gemm` to demonstrate `int8_t` as matrix input data type * Added support for the gfx1150 target ### Changed * Removed unnecessary const keyword to avoid compiler warnings * rocWMMA has been moved into the new rocm-libraries "monorepo" repository (https://github.com/ROCm/rocm-libraries). This repository consolidates a number of separate ROCm libraries and shared components. * The repository migration requires a few changes to the CMake configuration of rocWMMA * The repository migration rquired the GTest dependency to be updated to v1.16.0 ### Resolved issues * Skip invalid test configurations when using 'register file' LDS mapping * Ensured transform functions in samples are only available on the device ## rocWMMA 2.1.0 for ROCm 7.1.1 ### Added * Added more unit tests to increase the code coverage. ### Changed * Increased compile timeout and improved visualization in `math-ci`. ### Removed * Removed absolute paths from the `RPATH` of sample and test binary files. ### Resolved issues * Fixed issues caused by HIP changes: * Removed the .data member from HIP_vector_type. * Broadcast constructor now only writes to the first vector element. * Fixed a bug related to `int32_t` usage in `hipRTC_gemm` for gfx942, caused by breaking changes in HIP. * Replaced `#pragma unroll` with `static for` to fix a bug caused by the upgraded compiler which no longer supports using `#pragma unroll` with template parameter indices. * Corrected test predicates for `BLK` and `VW` cooperative kernels. * Modified `compute_utils.sh` in `build-infra` to ensure rocWMMA is built with gfx1151 target for ROCm 7.0 and beyond. ## rocWMMA 2.0.0 for ROCm 7.0.0 ### Added * Added internal register layout transforms to support interleaved MMA layouts * Added support for the gfx950 target * Added mixed input `bf8` / `fp8` types for MMA support * Added fragment scheduler API objects to embed thread block cooperation properties in fragments ### Changed * Augmented load / store / MMA internals with static loop unrolling * rocWMMA mma_sync API now supports `wave tile` fragment sizes * rocWMMA cooperative fragments are now expressed with fragment scheduler template arguments * rocWMMA cooperative fragments now use the same base API as non-cooperative fragments * rocWMMA cooperative fragments register usage footprint has been reduced * rocWMMA fragments now support partial tile sizes with padding ### Optimized * Added internal flow control barriers to improve assembly code generation and overall performance * Enabled interleaved layouts by default in MMA to improve overall performance ### Removed * Removed support for the gfx940 and gfx941 targets * Removed the rocWMMA cooperative API * Removed wave count template parameters from transforms APIs ### Resolved issues * Fixed a validation issue for small precision compute types `< B32` on gfx9 * Fixed CMake validation of compiler support for `bf8` / `fp8` types * Fixed linkage of rocwmma::synchronize_workgroup to inline ## rocWMMA 1.7.0 for ROCm 6.4.0 ### Added * Added interleaved layouts that enhance the performance of GEMM operations * Added emulation test suites. These suites are lightweight and well-suited for execution on emulator platforms ### Changed * Used GPU_TARGETS instead of AMDGPU_TARGETS in `cmakelists.txt` * The binary sizes can be reduced on supported compilers by using the `--offload-compress` compiler flag ### Resolved issues * For a CMake bug workaround, set `CMAKE_NO_BUILTIN_CHRPATH` when `BUILD_OFFLOAD_COMPRESS` is unset ## rocWMMA 1.6.0 for ROCm 6.3.0 ### Added * Added OCP f8/bf8 datatype support * Added support for gfx12 architecture targets ### Changed * Optimized some aos<->soa transforms with half-rotation offsets * Refactored the rocBLAS reference entry point for validation and benchmarking * ROCWMMA_* preprocessor configurations are now all assigned values * Updated the default architecture targets for ASAN builds * Updated the actor-critic implementation ### Resolved issues * Fixed a bug in f64 validation due to faulty typecasting * Fixed a bug causing runtime compilation errors with hipRTC * Various documentation updates and fixes ### Upcoming changes * rocWMMA 2.x plans to augment fragment API objects with additional meta-properties which will improve API expressiveness and configurability of parameters including multiple-wave cooperation. As part of this change, cooperative rocWMMA API functions `load_matrix_coop_sync` and `store_matrix_coop_sync` will become deprecated in a future ROCm release. ## rocWMMA 1.5.0 for ROCm 6.2.0 ### Additions * Added internal utilities for element-wise vector transforms * Added internal utilities for cross-lane vector transforms * Implemented internal aos<->soa transforms for block sizes of 16, 32, 64, 128 and 256 and vector widths of 2, 4, 8 and 16 * Added tests for new internal transforms ### Changes * Improved loading layouts by increasing vector width for fragments with blockDim > 32 * API applyDataLayout transform now accepts WaveCount template argument for cooperative fragments * API applyDataLayout transform now physically applies aos<->soa transform as necessary * Refactored entry-point of std library usage to improve hipRTC support * Documentation updates for installation, programmer's guide and API reference ### Fixes * Fixed some header includes ordering to improve portability ## rocWMMA 1.4.0 for ROCm 6.1.0 ### Additions * Added bf16 support for hipRTC sample ### Changes * Changed Clang C++ version to C++17 * Updated rocwmma_coop API * Linked rocWMMA to hiprtc ### Fixes * Fixed compile/runtime arch checks * Built all test in large code model * Removed inefficient branching in layout loop unrolling ## rocWMMA 1.3.0 for ROCm 6.0.0 ### Additions * Support for gfx940, gfx941, and gfx942 targets * Support for f8, bf8, and xfloat32 data types * support for `HIP_NO_HALF`, `__ HIP_NO_HALF_CONVERSIONS__`, and `__ HIP_NO_HALF_OPERATORS__` (e.g., PyTorch environment) ### Changes * rocWMMA with hipRTC now supports `bfloat16_t` data type * gfx11 WMMA now uses lane swap instead of broadcast for layout adjustment * Updated samples GEMM parameter validation on host arch ### Fixes * Disabled GoogleTest static library deployment * Extended tests now build in large code model ## rocWMMA 1.2.0 for ROCm 5.7.0 ### Changes * Fixed a synchronization bug * Updated rocWMMA CMake versioning ## rocWMMA 1.1.0 for ROCm 5.6.0 ### Additions * Cross-lane operation backends (Blend, Permute, Swizzle, and Dpp) * GPU kernels for rocWMMA unit test pre-process and post-process operations (fill, validation) * Performance GEMM samples for half, single, and double precision * rocWMMA CMake versioning * Vectorized support in coordinate transforms * Included ROCm SMI for runtime clock rate detection * Fragment transforms for transpose and change data layout ### Changes * Default to GPU rocBLAS validation against rocWMMA * Re-enabled int8 GEMM tests on gfx9 * Upgraded to C++17 * Restructured the unit test folder for consistency * Consolidated rocWMMA samples common code ## rocWMMA 1.0 for ROCm 5.5.0 ### Additions * Support for Wave32 on gfx11+ * Infrastructure changes to support hipRTC * Performance tracking system * Library config to support multiple architectures * Vector cross-lane operations support ### Changes * Modified the assignment of hardware information * Modified data access for unsigned data types * Refactored vector backend to be compatible with `HIP_vector_type` ## rocWMMA 0.9 for ROCm 5.4.0 ### Additions * GEMM driver APIs for flow control built-ins * benchmark logging systems * Restructured tests to follow naming convention; added macros for test generation ### Changes * Changed CMake to accommodate the modified test infrastructure * Fine-tuned the multi-block kernels with and without lDs * Adjusted maximum vector width to dWordx4 * Updated efficiencies to display as whole number percentages * Updated throughput from GFlops/s to TFlops/s * Reset the ad-hoc tests to use smaller sizes * Modified the output validation to use CPU-based implementation against rocWMMA * Modified the extended vector test to return error codes for memory allocation failures ## rocWMMA 0.8 for ROCm 5.3.0 ### Additions * Runtime checks to disable tests on non-target GPUS * Workgroup-aware GEMM kernels * Workgroup-aware validation and benchmark test suite * Warm-up run to existing tests ### Changes * Refactored `lds_mapping_util` into GEMM global, local mapping, GEMM driver, GEMM config, and scheduling classes * Modified resource allocation and tracking of GEMM and DLRM buffers * Improved low-level data loading patterns * Reduced branching on cooperative load and store * Updated GEMV sample * Updated GEMM sample ## rocWMMA 0.7 for ROCm 5.2.0 ### Additions * Unit tests for DLRM kernels * GEMM sample * DLRM sample * SGEMV sample * Unit tests for cooperative WMMA load and stores * Unit tests for `IOBarrier.h` * WMMA load and store tests for different matrix types (A, B, and Accumulator) * More block sizes (1, 2, 4, 8) to test `MmaSyncMultiTest` * Block sizes 4, 8 to test `MmaSynMultiLdsTest` * Support for WMMA load and store layouts with a block dimension greater than 64 * IOShape structure to define the attributes of mapping and layouts for all WMMA matrix types * CI testing for rocWMMA ### Changes * Renamed WMMA to rocWMMA in CMake, header files, and documentation * Renamed library files * Modified `Layout.h` to use different matrix offset calculations (base offset, incremental offset, and cumulative offset) * Opaque load and store continue to use incremental offsets as they fill the entire block * Cooperative load and store use cumulative offsets as they fill only small portions for the entire block * Increased max split counts to 64 for cooperative load and store * Moved all the WMMA definitions and API headers to the rocWMMA namespace * Modified WMMA fill unit tests to validate all matrix types (A, B, Accumulator) ## rocWMMA 0.6 ### Additions * Unit tests for `MappingUtil.h` * Unit tests for `Layout.h` * Unit tests for non-native vector class in `Types.h` * Unit tests for WMMA load and store contamination check * Doxygen support for rocWMMA documentation * MFMA barrier in `IOBarrier.h` * A CMake flag to support WMMA kernel assembly code generation * MMA sync test WMMA operation with LDS usage * A script to generate the plots of different WMMA benchmarks * Multi-block kernels with LDS usage * Unit tests for multi-block WMMA kernels ### Changes * Modified GLlops calculation to accommodate multiple devices * Removed half-types packing quirk with col major output * Moved HIP resource management to `HipResource` class * Fixed NaN errors during output comparison ## rocWMMA 0.5 ### Additions * Templatization for the `amdgcn_convert` class * WMMA load, store, and fill support for integral data types and float64 * MFMA support for i8 * Support for `bf16_1k` MFMA instructions * Code to identify the card type and its support during runtime ### Changes * Refactored and simplified `IOBroadcast.h` * Modified the fragment interface compatible with NVIDIA's definition * Modified CMake to create a lean build of the rocWMMA library ## rocWMMA 0.4 ### Additions * CMake support for the library and unit tests * Integrated unit test with GoogleTest and OpenMP * Host overload operators for `hfloat16_t` ### Fixes * Relative error calculation for non-integral data comparison * Assembly generation of cooperative load and store code * Compiler issues with new versions of ROCm ### Changes * Sped up compilation time by moving thread block sizes to function arguments instead of template parameters * Moved all the existing unit tests to a `test` folder * Moved all the header files to `library/include` * Modified `Layout.h` to use RowNT/ColNT to eliminate LDS usage in `mma_sync` * Deprecated buffer load/store and local load/store ## rocWMMA 0.3 ### Additions * support for the bfloat16 compute type ### Changes * Renamed `__half` to `hfloat_16` for consistency * Modified `Convert.h` to support native to bfloat16 conversion and vice versa * Modified `IOBroadCast.h` to incorporate bfloat16 data packing * Modified `IOTraits.h` to add bfloat16 packing traits * Modified `MFMA.h` to add MFMA invocation calls to bfloat16 data * Modified WMMA types to include `bfloat16_t` * Modified the WMMA load, store, and MMA unit tests to validate bfloat16 ## rocWMMA 0.2 ### Additions * Support for fp16 compute type * Direct MFMA support for non-native `__half` data type ### Changes * Adjusted the vector storage to accommodate non-native types * Fixed data comparison operators for fp16 data types * Modified `Convert.h` to support native to `__half` conversion and vice versa * Modified `IOBroadCast.h` to incorporate `__half` data packing * Modified `IOTraits.h` to add `__half` packing traits * Modified `MFMA.h` to add MFMA invocation calls to `__half` data * Modified WMMA Types to include `__half _t` * Modified the WMMA load, store, and MMA unit tests to validate `__half` ## rocWMMA 0.1 ### Additions * Defined a WMMA namespace with the supported matrix types, memory, and layouts * Defined a fragment datatype to control the data transfer between HIP and MFMA * Implemented the rocWMMA functions : `load_matrix_sync`, `load_matrix_coop_sync`, `store_matrix_sync`, `fill_fragment`, and `mma_sync` * Implemented `Types.h` to define the supported data types * Implemented the class `IOTraits` to define packing traits for the defined types as WMMA works on the packed registers * Buffer load, store to support LLVM data instructions * Opaque load, store * Cooperative load, store to optimize the memory overhead * Local load, store to perform register packing * Implemented `Convert.h` to perform non-native data type conversion to native types and vice versa * `IOBroadcast` class to perform packing for all input data (multiple registers) * Implemented `IOConfig` to set the optimal input/output configurations for rocWMMA matrix types * Implemented `IOPack` and `IOUnpack` to convert the unpacked device memory into packed registers and vice versa * `Layout` class to define the data layout in matrix space * MFMA to call the low-level MFMA hardware instructions * Implemented the `MappingUtil` class to map from workgroup configurations to functional wave units * `Performance.h` to compute GFLOPS based on hardware configurations * `Reference.h` to implement the CPU GEMM operation * `Utils.h` to implement matrix data operations * `rocBLASReference.h` to invoke the rocBLAS GEMM function * Unit tests to validate WMMA APIs (`load`, `store`, `fill`, and `mma`) * Makefile support to build library and tests ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/CMakeLists.txt0000664000175100017510000002043415206065535015225 0ustar00jenkinsjenkins############################################################################### # # MIT License # # Copyright (C) 2021-2025 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # ############################################################################### cmake_minimum_required( VERSION 3.14 ) message(STATUS "CMake version: ${CMAKE_VERSION}") if(NOT WIN32) if(CMAKE_GENERATOR STREQUAL "Ninja") message(STATUS "The CMake generator is Ninja.") set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) else() message(WARNING "The CMake generator is not Ninja. Ninja is preferred over Make.") endif() endif() # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE if( NOT DEFINED CMAKE_CONFIGURATION_TYPES AND NOT DEFINED CMAKE_BUILD_TYPE ) set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) endif() message(STATUS "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") set( CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "" FORCE ) ### Project ROCWMMA project( rocwmma LANGUAGES CXX ) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) #Set Clang C++ flags. set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O2") # clang++ crashes without -O2 set(CMAKE_CXX_FLAGS_MINSIZEREL "-O2 -DNDEBUG") # clang++ failed to build the project with the default -Os set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --driver-mode=g++ -Xclang -fallow-half-arguments-and-returns -D__HIP_HCC_COMPAT_MODE__=1 -Wno-format-nonliteral -parallel-jobs=4 -fclang-abi-compat=17") # Top level configs if( CMAKE_PROJECT_NAME STREQUAL "rocwmma" ) option( ROCWMMA_BUILD_TESTS "Build rocWMMA tests" ON ) option( ROCWMMA_BUILD_SAMPLES "Build rocWMMA samples" ON ) option( ROCWMMA_BUILD_ASSEMBLY "Output assembly files" OFF ) option( BUILD_OFFLOAD_COMPRESS "Build rocWMMA with offload compression" ON ) option( ROCWMMA_CODE_COVERAGE "Build with code coverage flags (clang only)" OFF) endif() if( CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT ) set( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories" FORCE ) endif() if( NOT CPACK_PACKAGING_INSTALL_PREFIX ) set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" CACHE PATH "Install path prefix for packages.") set(CPACK_SET_DESTDIR OFF) endif() set(BUILD_SHARED_LIBS ON) # This helps cmake properly find hip-config.cmake list( APPEND CMAKE_PREFIX_PATH $ENV{ROCM_PATH} ${ROCM_PATH} /opt/rocm ) # Append our library helper cmake path and the cmake path for hip (for convenience). # Users may override HIP path by specifying their own in CMAKE_MODULE_PATH list( APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ) find_package(ROCmCMakeBuildTools 0.7 CONFIG REQUIRED) include(ROCMSetupVersion) include(ROCMCreatePackage) include(ROCMInstallTargets) include(ROCMCheckTargetIds) include(ROCMClients) # Versioning via rocm-cmake set ( VERSION_STRING "2.2.0" ) rocm_setup_version( VERSION ${VERSION_STRING} ) # configure a header file to pass the CMake version settings to the source configure_file("${CMAKE_CURRENT_SOURCE_DIR}/library/include/rocwmma/internal/rocwmma-version.hpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/library/include/rocwmma/rocwmma-version.hpp" ) # check if asan is enabled if (NOT DEFINED ADDRESS_SANITIZER AND DEFINED ENV{ADDRESS_SANITIZER}) set(ADDRESS_SANITIZER $ENV{ADDRESS_SANITIZER}) endif() if (ADDRESS_SANITIZER OR CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") set(ADDRESS_SANITIZER_ENABLED ON) else() set(ADDRESS_SANITIZER_ENABLED OFF) endif() if (ADDRESS_SANITIZER_ENABLED) #TODO: Remove next line when rocm-cmake fix is available rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx90a:xnack+;gfx942:xnack+;gfx950:xnack+" ) else() rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx908;gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201" ) endif() # check if ROCm supports `__hip_fp8_e5m2` and `__hip_fp8_e4m3` include(cmake/Macros/CheckF8.cmake) check_f8(F8_EXISTS) if(F8_EXISTS) message( STATUS "Performing Test `__hip_fp8_e5m2` and `__hip_fp8_e4m3` - Success" ) else() message(FATAL_ERROR "The detected ROCm does not support data type `__hip_fp8_e5m2` or `__hip_fp8_e4m3`.") endif() # Check if offload compression is supported include(CheckCXXCompilerFlag) if (BUILD_OFFLOAD_COMPRESS) check_cxx_compiler_flag("--offload-compress" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) endif() # TODO: Remove next line when rocm-cmake fix is available # Currently fixes linking issues with large executables set(CMAKE_NO_BUILTIN_CHRPATH ON) # Variable GPU_TARGET must be a cached variable and must be specified before calling find_package(hip) # This is because hip-config.cmake sets --offload-arch via GPU_TARGET cached variable __after__ setting # default cached variable GPU_TARGET to DEFAULT_GPU_TARGETS, where not all archs are compatible with MFMA instructions # # By rule, once cached variable is set, it cannot be overridden unless we use the FORCE option if(GPU_TARGETS) set(GPU_TARGETS "${GPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") elseif(AMDGPU_TARGETS) set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") message(WARNING "AMDGPU_TARGETS use is deprecated. Use GPU_TARGETS.") else() set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") endif() message(STATUS "GPU_TARGETS=${GPU_TARGETS}") find_package( hip REQUIRED ) find_package( hiprtc REQUIRED ) find_package( OpenMP REQUIRED ) if (ROCWMMA_BUILD_BENCHMARK_TESTS) ## Check for ROCM-smi find_package(rocm_smi PATHS ${ROCM_PATH}/lib/cmake/rocm_smi) if (rocm_smi_FOUND) message(STATUS "Found rocm_smi at ${ROCM_SMI_INCLUDE_DIR}") else() set(ROCM_SMI_INCLUDE_DIR "${ROCM_PATH}/rocm_smi/include") set(ROCM_SMI_LIB_DIR "${ROCM_PATH}/rocm_smi/lib") set(ROCM_SMI_LIBRARY rocm_smi64) endif() endif() add_library(rocwmma INTERFACE) target_link_libraries(rocwmma INTERFACE hip::device hip::host OpenMP::OpenMP_CXX ${ROCM_SMI_LIBRARY}) rocm_install_targets( TARGETS rocwmma INCLUDE library/include ) if(ROCWMMA_BUILD_SAMPLES OR ROCWMMA_BUILD_TESTS) enable_testing() rocm_package_setup_component(clients) endif() if(ROCWMMA_BUILD_SAMPLES) rocm_package_setup_component(samples PARENT clients) add_subdirectory(samples) endif() if(ROCWMMA_BUILD_TESTS) rocm_package_setup_component(tests PARENT clients) add_subdirectory(test) endif() # Package if(BUILD_ADDRESS_SANITIZER) set(DEPENDS_HIP_RUNTIME "hip-runtime-amd-asan" ) else() set(DEPENDS_HIP_RUNTIME "hip-runtime-amd" ) endif() rocm_package_add_dependencies("${DEPENDS_HIP_RUNTIME} >= 4.5.0") rocm_package_add_deb_dependencies("libomp-dev") rocm_package_add_rpm_dependencies("libomp-devel") set(CPACK_RPM_PACKAGE_LICENSE "MIT") rocm_export_targets( TARGETS roc::rocwmma NAMESPACE roc:: ) rocm_create_package( NAME rocwmma DESCRIPTION "AMD GPU C++ library for GEMM primitives using MFMA and WMMA matrix instructions" MAINTAINER "rocWMMA Maintainer " HEADER_ONLY ) ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/CONTRIBUTING.md0000664000175100017510000003613415206065535014722 0ustar00jenkinsjenkins # Contributing to rocWMMA # External contributions and feedback are welcome for rocWMMA. Please see the following details to help maximize the likelihood your contributions will be accepted. ## Issue Discussion ## Please use the [GitHub Issues](https://github.com/ROCm/rocm-libraries/issues) tab to notify us of issues. * Use your best judgement for issue creation. If your issue is already listed, upvote the existing issue and comment or post to provide additional details, such as how you reproduced this issue. * If you're not sure if your issue is the same, err on the side of caution and file your issue. You can add a comment to include the issue number (and link) for the similar/related issue. If we evaluate your issue as being the same as the existing issue, we'll close the duplicate. * If your issue doesn't exist, use the issue template to file a new issue. * When filing an issue, be sure to provide as much information as possible, including script output so we can collect information about your configuration. This helps reduce the time required to reproduce your issue. * Check on updates to your issue regularly, as we may require additional information to successfully reproduce the issue. * You may also open an issue to ask questions to the maintainers about whether a proposed change meets the acceptance criteria, or to discuss an idea pertaining to the library. New issues should use the following templates: * **Bugs** + Any unintended functionality within the rocWMMA library should be thoroughly documented with the template below. 1. Description: ***Please be clear and concise*** 2. Steps for Reproduction: - Hardware Information: - Docker Environment or Software Versions: - Expected Behavior: - Actual Behavior: 3. Any additional information: 4. Add the `project: rocWMMA` tag 5. Specify 'rocWMMA' in the 'ROCm Component' field, if available. * **Enhancement Requests** + Any proposed enhancements to rocWMMA should be thoroughly documented with the template below. 1. Description: ***Please be clear and concise*** 2. Value and Motivation - Feature and/or Functionalities Enabled: - Any Alternatives 3. Any additional information: 4. Add the `project: rocWMMA` tag 5. Specify 'rocWMMA' in the 'ROCm Component' field, if available. ## Acceptance Criteria ## The goal of rocWMMA is to provide a C++ API for facilitating block-wise decomposition of matrix multiply accumulate (MMA) workflows while leveraging specialized AMD GPU hardware. rocWMMA also facilitates migration of nvcuda::wmma users to AMD's HIP environment on AMD GPUs and provides a level of equivalent functionality. Contributors that wish to help optimize and expand the capabilities of rocWMMA in pursuit of this goal should adhere to the following guidelines for all features and fixes. Detailed coding style and pull request guidelines are covered later sections. Contributors wishing to submit new features for rocWMMA should follow the guidelines outlined below: - Performance Improvements * Features targeting performance improvements for any aspect of rocWMMA are generally permitted. Any optimizations must be both notable and repeatable in order to avoid unnecessary code maintenance. Documentation regarding performance improvements such as benchmark test comparisons must also be provided. - Bug Fixes * Any observed unintended behavior within the rocWMMA library should first be documented by filing a bug report issue using the above template. Non-critical issues may be deferred until future releases. - WMMA Porting * Developers wishing to implement gap closures with nvcuda::wmma may suggest additional features to do so All new features and fixes should also tie into the rocWMMA [GitHub Issues](https://github.com/ROCm/rocm-libraries/issues) tab: - **Enhancements** + Any implementations of pre-filed enhancement requests should clearly link to the original issue. + Any new enhancements should be documented as if filing a new enhancement request using the template above. - **Bug Fixes** + Any fixes for pre-filed issues should clearly link to the original issue. + Any newly found issues should be documented as if filing a new issue using the template above. ### Exceptions ### Exceptions to these criteria will be handled on a case-by-case basis, and should be discussed via the Issues tab. ## Code Structure ## The organization of the rocWMMA library is explained in detail in the [Programmers Guide](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocwmma/docs/conceptual/programmers-guide.rst). ## Coding Style ## This project follows the [CPP Core Guidelines](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md) with few modifications or additions noted below. All pull-requests should in good faith attempt to follow the guidelines stated therein, but we recognize that the content is lengthy. Below we list our primary concerns when reviewing pull-requests. ### Interface ### - Library code should use C++17. - Our minimum supported compiler is hipcc 4.4. - Avoid CamelCase. * This rule applies specifically to publicly visible APIs, but is also encouraged (not mandated) for internal code. * If you are unsure, inspect surrounding code for consistency and don't be afraid to pose questions for clarification on the PRs. ### Philosophy ### - [P.2](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rp-Cplusplus) Write in ISO Standard C++17 (especially to support Windows, Linux and MacOS platforms) - [P.5](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rp-compile-time) Prefer compile-time checking to run-time checking ### Implementation ### - [SF.1](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rs-file-suffix) Use a ``.cpp`` suffix for code files and an ``.hpp`` suffix for interface files if your project doesn't already follow another convention - [SF.5](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rs-consistency) A ``.cpp`` file must include the ``.hpp`` file(s) that defines its interface - [SF.7](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rs-using-directive) Don't put a global ``using``-directive in a header file - [SF.8](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rs-guards) Use ``#include`` guards for all ``.hpp`` files - [SF.21](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rs-unnamed) Don't use an unnamed (anonymous) ``namespace`` in a header - [SL.10](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rsl-arrays)* Prefer using ``std::array`` or ``std::vector`` instead of a C array. - [C.9](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rc-private) Minimize the exposure of class members - [F.3](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rf-single) Keep functions short and simple - [F.21](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rf-out-multi)* To return multiple 'out' values, prefer returning a ``std::tuple`` - [R.1](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rr-raii)* Manage resources automatically using RAII (this includes ``std::unique_ptr`` & ``std::shared_ptr``) - [ES.11](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Res-auto) Use ``auto`` to avoid redundant repetition of type names - [ES.20](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Res-always) Always initialize an object - [ES.23](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Res-list) Prefer the ``{}`` initializer syntax - [CP.1](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#S-concurrency) Assume that your code will run as part of a multi-threaded program - [I.2](https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Ri-global) Avoid global variables | *std functions are not recommended for any ``rocwmma/internal`` code to maintain hipRTC compatibility. | |--------------------------------------------------------------------------------------------------------| ## Pull Request Guidelines ## Our code contribution guidelines closely follows the model of [GitHub Pull-Requests](https://help.github.com/articles/using-pull-requests). The rocWMMA repository follows a workflow which dictates a ``/master`` branch where releases are cut, and a ``/develop`` branch which serves as an integration branch for new code. No changes are allowed to be directly committed to the develop branch of the rocWMMA repository. All authors are required to develop their change sets on a separate branch (preferably in your own fork), and then create a pull request targeting the develop branch of the upstream repository. When you create a pull request, you should target the **develop** branch for integration. The typical workflow for creating a rocWMMA pull request is as follows: 1. Create and track a rocWMMA fork, if you haven't already done so. 2. Clone your fork: ```bash git clone -b develop https://github.com//rocWMMA.git . .githooks/install git checkout -b ... git add git commit -m "What was changed" git push origin ... ``` 3. Create a pull request to the ROCmSoftwarePlatform/rocWMMA develop branch. 4. Await CI and approval feedback. 5. Once approved, merge. | You must install GitHooks every time you clone a rocWMMA repository to ensure automated triggers for Clang formatting are executed upon making commits. Instructions for formatting rocWMMA are included in [Formatting](#formatting). | |----------------------------------------------------------------------------------------------------------------------------------------------------------------------| ### Deliverables ### rocWMMA has a set of required deliverables for every pull request that are as follows. 1. **Test Integration**: - All new functionality introduced to rocWMMA must be accompanied by unit tests. Unit tests should integrate within the existing googletest framework and must have good code coverage. Existing unit tests should be used as a guide and are found in ``test/unit``. Be sure to consider rocWMMA's support matrix for datatypes, block sizes and architectures. - New features that aim to optimize rocWMMA must have benchmark and validation tests, and performance must approach the compute bound limit or memory bound limit. These tests should follow the same googletest framework laid out in the rocWMMA GEMM tests found in ``test/gemm``. Features that impact the performance of existing rocWMMA kernels must be accompanied with a performance analysis against the pre-existing kernels. 2. **API Documentation**: - Any new outward facing rocWMMA API functions must be properly documented and included in the [API Reference Guide](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocwmma/docs/api-reference/api-reference-guide.rst). 3. **Type Support**: - All features introduced to rocWMMA must maintain support for the following types: - **Supported Datatypes (gfx9)** - Native Data Types: int8, f16, f32, f64* - Non-Native Data Types: h16 (__half), bf16, f8**, bf8** - **Supported Datatypes (gfx11)** - Native Data Types: int8, f16 - Non-Native Data Types: h16, bf16 - **Supported Datatypes (gfx12)** - Native Data Types: int8, f16 - Non-Native Data Types: h16, bf16, f8**, bf8** | *Only on gfx90a, gfx942 & gfx950. | |-----------------------------------| | **Only on gfx942, gfx950 and gfx12. | |-------------------------------------| - Support for the other rocWMMA fragment parameters as described in ``library/include/rocwmma/rocwmma.hpp`` must also be maintained. 4. **Licensing**: - All code submitted to rocWMMA must be original, no AI generated code is currently being accepted. - The code you are contributing is your own, and you have the right to license it. - No code found under other licenses is permitted. - Any submitted code will subsequently be covered under the MIT License. - For each new file introduced in your pull request, please include the licensing header: ``` /******************************************************************************* * * MIT License * * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * *******************************************************************************/ ``` ### Process ### Reviewers for rocWMMA pull requests are listed under ``.github/CODEOWNERS``. Pull requests should be properly documented with comments and linked to their corresponding issues. Pull request reviews should include insightful comments where changes are requested. #### Formatting #### rocWMMA C++ code is formatted using ``clang-format``. - To manually format using clang-format use the version in the ``/opt/rocm/llvm/bin`` directory. Please do not use your system's built-in ``clang-format``, as this may be an older version that will result in different results. To format a file, use: ``` /opt/rocm/llvm/bin/clang-format -style=file -i ``` To format all files, run the following script in rocWMMA directory: ``` #!/bin/bash git ls-files -z *.cc *.cpp *.h *.hpp *.cl *.h.in *.hpp.in *.cpp.in | xargs -0 /opt/rocm/llvm/bin/clang-format -style=file -i ``` - Alternatively, githooks can be installed to format the code per-commit: ``` /.githooks/install ``` ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/LICENSE.md0000664000175100017510000000211215206065535014062 0ustar00jenkinsjenkinsCopyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/README.md0000664000175100017510000001343115206065535013743 0ustar00jenkinsjenkins# rocWMMA Welcome! rocWMMA is a C++ library for accelerating mixed-precision matrix multiply-accumulate (MMA) operations leveraging AMD GPU hardware. rocWMMA makes it easier to break down MMA problems into fragments and distribute block-wise MMA operations in parallel across GPU wavefronts. The API consists of a header library, that can be used to compile MMA acceleration directly into GPU kernel device code. This can benefit from compiler optimization in the generation of kernel assembly, and doesn't incur additional overhead costs of linking to external runtime libraries or having to launch separate kernels. rocWMMA includes sample projects to validate and demonstrate API usage. These include simple GEMMs, performant GEMMs, DLRM, GEMV and hipRTC integration. The test suite includes validation and benchmarking projects that focus on unit testing, GEMMs and DLRM. > [!NOTE] > The published rocWMMA documentation is available at [rocWMMA](https://rocm.docs.amd.com/projects/rocWMMA/en/latest/index.html) in an organized, easy-to-read format, with search and a table of contents. The documentation source files reside in the `projects/rocwmma/docs` folder of this repository. As with all ROCm projects, the documentation is open source. For more information, see [Contribute to ROCm documentation](https://rocm.docs.amd.com/en/latest/contribute/contributing.html). ## Requirements rocWMMA currently supports the following AMD GPU architectures: * CDNA class GPU featuring matrix core support: gfx908, gfx90a, gfx942, gfx950 as 'gfx9' * RDNA class GPU featuring AI acceleration support: gfx1100, gfx1101, gfx1102, gfx1151 as 'gfx11'; gfx1200, gfx1201 as 'gfx12' Dependencies: * Minimum ROCm version support is 6.4. * Minimum cmake version support is 3.14. * Minimum ROCm-cmake version support is 0.8.0. * Minimum rocBLAS version support is rocBLAS 4.0.0 for ROCm 6.0* (or ROCm packages rocblas and rocblas-dev). * Minimum ROCm SMI version support is 7.6.0** (or ROCm packages rocm-smi-lib and librocm-smi-dev). * Minimum HIP runtime version support is 4.3.0 (or ROCm package hip-runtime-amd). * Minimum LLVM OpenMP runtime dev package version support is 10.0 (available as ROCm package rocm-llvm-dev). ```note:: * = if using rocBLAS for validation. ** = if building benchmark tests (configuring with ROCWMMA_BUILD_BENCHMARK_TESTS=ON). It is best to use available ROCm packages from the same release where applicable. ``` ## Build with CMake For more detailed information, please refer to the [rocWMMA installation guide](https://rocm.docs.amd.com/projects/rocWMMA/en/latest/install/installation.html). ### Project options |Option|Description|Default value| |---|---|---| |GPU_TARGETS|Build code for specific GPU target(s)|gfx908;gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201| |AMDGPU_TARGETS|(Deprecated) Build code for specific GPU target(s)|gfx908;gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201| |ROCWMMA_BUILD_TESTS|Build Tests|ON| |ROCWMMA_BUILD_SAMPLES|Build Samples|ON| |ROCWMMA_BUILD_DOCS|Build doxygen documentation from code|OFF| |ROCWMMA_BUILD_ASSEMBLY|Generate assembly files|OFF| |ROCWMMA_BUILD_VALIDATION_TESTS|Build validation tests |ON (requires ROCWMMA_BUILD_TESTS=ON)| |ROCWMMA_BUILD_BENCHMARK_TESTS|Build benchmark tests |OFF (requires ROCWMMA_BUILD_TESTS=ON)| |ROCWMMA_BUILD_EXTENDED_TESTS|Build extended testing coverage |OFF (requires ROCWMMA_BUILD_TESTS=ON)| |ROCWMMA_VALIDATE_WITH_ROCBLAS|Use rocBLAS for validation tests|ON (requires ROCWMMA_BUILD_VALIDATION_TESTS=ON)| |ROCWMMA_BENCHMARK_WITH_ROCBLAS|Include rocBLAS benchmarking data|OFF (requires ROCWMMA_BUILD_BENCHMARK_TESTS=ON)| |ROCWMMA_USE_SYSTEM_GOOGLETEST|Use system Google Test library instead of downloading and building it|OFF (requires ROCWMMA_BUILD_TESTS=ON)| ### Example configurations By default, the project is configured in release mode and is linked against rocBLAS for validating results. Here are some configuration examples: |Configuration|Command| |---|---| |Basic|`CC=/opt/rocm/bin/amdclang CXX=/opt/rocm/bin/amdclang++ cmake -B .`| |Targeting gfx908|`CC=/opt/rocm/bin/amdclang CXX=/opt/rocm/bin/amdclang++ cmake -B . -DGPU_TARGETS=gfx908:xnack-` | |Debug build|`CC=/opt/rocm/bin/amdclang CXX=/opt/rocm/bin/amdclang++ cmake -B . -DCMAKE_BUILD_TYPE=Debug` | |Build without rocBLAS (default on)|`CC=/opt/rocm/bin/amdclang CXX=/opt/rocm/bin/amdclang++ cmake -B . -DROCWMMA_VALIDATE_WITH_ROCBLAS=OFF -DROCWMMA_BENCHMARK_WITH_ROCBLAS=OFF` | After configuration, build with `cmake --build -- -j` ## Documentation For more comprehensive documentation on installation, samples and test contents, API reference and programmer's guide you can build the documentation locally in different ways. ### Html ```bash cd docs pip3 install -r sphinx/requirements.txt python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html ``` The HTML documentation can be viewed in your browser by opening the `docs/_build/html/index.html` result. ### Pdf ```bash cd docs sudo apt-get update sudo apt-get install doxygen sudo apt-get install texlive-latex-base texlive-latex-extra pip3 install -r sphinx/requirements.txt python3 -m sphinx -T -E -b latex -d _build/doctrees -D language=en . _build/latex cd _build/latex pdflatex rocwmma.tex ``` Running the above commands generates `rocwmma.pdf`. The latest official documentation for rocWMMA is available at: [https://rocm.docs.amd.com/projects/rocWMMA/en/latest/index.html](https://rocm.docs.amd.com/projects/rocWMMA/en/latest/index.html). ## Contributing to the rocWMMA Library Community collaboration is encouraged! If you are considering contributing, please follow the [rocWMMA Contribution Guide](https://github.com/ROCm/rocm-libraries/blob/develop/projects/rocwmma/CONTRIBUTING.md) to get started. ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/cmake/0000775000175100017510000000000015206065535013542 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/cmake/Macros/0000775000175100017510000000000015206065535014766 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/cmake/Macros/CheckF8.cmake0000664000175100017510000000125315206065535017204 0ustar00jenkinsjenkins# Define a macro to check for the struct macro(check_f8 RESULT_VAR) # Create a temporary source file file(WRITE ${CMAKE_BINARY_DIR}/CheckF8.cxx " #include struct __hip_fp8_e5m2 e5m2; struct __hip_fp8_e4m3 e4m3; int main() { return 0; } " ) # Try to compile the test program try_compile(HAS_F8 ${CMAKE_BINARY_DIR} SOURCES ${CMAKE_BINARY_DIR}/CheckF8.cxx COMPILE_DEFINITIONS -xhip ) # Set the result variable if(HAS_F8) set(${RESULT_VAR} TRUE) else() set(${RESULT_VAR} FALSE) endif() # Clean up the temporary file (optional but recommended) file(REMOVE ${CMAKE_BINARY_DIR}/CheckF8.cxx) endmacro() ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/docker/0000775000175100017510000000000015206065535013731 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2768183 rocwmma/docker/ROCmUbuntuDev0000664000175100017510000000455515206065535016327 0ustar00jenkinsjenkins################################################################################ # Copyright (C) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- # ies of the Software, and to permit persons to whom the Software is furnished # to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- # PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- # CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ################################################################################ # This Dockerfile provides a starting point for a ROCm installation of Tensile. # Parameters related to building hip ARG base_image FROM ${base_image} USER root ARG user_uid # Install dependent packages RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ cmake \ ca-certificates \ doxygen \ pkg-config \ python3 \ python3-dev \ python3-pip \ python3-pytest \ python3-setuptools \ libnuma1 \ zlib1g-dev \ libomp-dev \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN pip3 install setuptools --upgrade && \ pip3 install wheel && \ pip3 install pandas && \ pip3 install matplotlib # docker pipeline runs containers with particular uid # create a jenkins user with this specific uid so it can use sudo privileges # Grant any member of sudo group password-less sudo privileges RUN useradd --create-home -u ${user_uid} -o -G video --shell /bin/bash jenkins && \ echo '%video ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd && \ chmod 400 /etc/sudoers.d/sudo-nopasswd ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2788184 rocwmma/docs/0000775000175100017510000000000015206065535013412 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/.gitignore0000664000175100017510000000013515206065535015401 0ustar00jenkinsjenkins# documentation artifacts _build/ _doxygen/ sphinx/_toc.yml doxygen/html/ doxygen/xml/ ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/api-reference/0000775000175100017510000000000015206065535016117 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/api-reference/api-reference-guide.rst0000664000175100017510000005020415206065535022452 0ustar00jenkinsjenkins.. meta:: :description: C++ library for accelerating mixed precision matrix multiply-accumulate operations leveraging specialized GPU matrix cores on AMD's latest discrete GPUs :keywords: rocWMMA, ROCm, library, API, tool .. _api-reference-guide: ==================== API reference guide ==================== This document provides information about rocWMMA functions, data types, and other programming constructs. Synchronous API --------------- rocWMMA API functions such as ``load_matrix_sync``, ``store_matrix_sync``, and ``mma_sync`` are synchronous when used with global memory. However, when you use these functions with shared memory, for example, LDS memory, explicit workgroup synchronization (``synchronize_workgroup``) might be required. Supported GPU architectures ---------------------------- Supported CDNA architectures (wave64): * gfx908 * gfx90a * gfx942 * gfx950 .. note:: gfx9 refers to gfx908, gfx90a, gfx942, and gfx950. Supported RDNA architectures (wave32): * gfx1100 * gfx1101 * gfx1102 * gfx1200 * gfx1201 .. note:: gfx11 refers to gfx1100, gfx1101, and gfx1102. gfx12 refers to gfx1200 and gfx1201. .. _rocwmma-supported-data-types: Supported data types -------------------- rocWMMA mixed precision multiply-accumulate operations support the following data type combinations. Data Types **** = , where: * Input Type = Matrix A / B * Output Type = Matrix C / D * Compute Type = Math / Accumulation type Supported data types: * i8: 8-bit precision integer * f8: 8-bit precision floating point * bf8: 8-bit precision brain floating point * f16: half-precision floating point * bf16: half-precision brain floating point * f32: single-precision floating point * i32: 32-bit precision integer * xf32: single-precision tensor floating point * f64: double-precision floating point .. note:: f16 includes support for both _Float16 and __half types. f8 NANOO (optimized) format is only supported on gfx942, otherwise f8 OCP is assumed on targets that support f8 datatypes. .. tabularcolumns:: |C|C|C|C|C| +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ |Ti / To / Tc |BlockM |BlockN |BlockK Range* | CDNA Support | RDNA Support | | | | |(Powers of 2) | | | +==============================+============+===========+===============+============================+====================+ | |16 |16 | 32+ | | gfx12 | | bf8 / f32 / f32 +------------+-----------+---------------+ gfx940, gfx950 +--------------------+ | |32 |32 | 16+ | | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | |16 |16 | 32+ | | gfx12 | | f8 / f32 / f32 +------------+-----------+---------------+ gfx940, gfx950 +--------------------+ | |32 |32 | 16+ | | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 16 | gfx908, gfx90a | gfx11, gfx12 | | | 16 | 16 +---------------+----------------------------+--------------------+ | | | | 32 | gfx940, gfx950 | \- | | | | +---------------+----------------------------+--------------------+ | | | | 64+ | gfx950 | \- | | i8 / i32 / i32 +------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx908, gfx90a | \- | | | 32 | 32 +---------------+----------------------------+--------------------+ | | | | 16 | gfx940, gfx950 | \- | | | | +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 16 | gfx908, gfx90a | gfx11, gfx12 | | | 16 | 16 +---------------+----------------------------+--------------------+ | | | | 32 | gfx940, gfx950 | \- | | | | +---------------+----------------------------+--------------------+ | | | | 64+ | gfx950 | \- | | i8 / i8 / i32 +------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx908, gfx90a | \- | | | 32 | 32 +---------------+----------------------------+--------------------+ | | | | 16 | gfx940, gfx950 | \- | | | | +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 16 | gfx9 | gfx11, gfx12 | | f16 / f32 / f32 | 16 | 16 +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | | +------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx9 | \- | | | 32 | 32 +---------------+----------------------------+--------------------+ | | | | 16+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 16 | gfx9 | gfx11, gfx12 | | f16 / f16 / f32 | 16 | 16 +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | | +------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx9 | \- | | | 32 | 32 +---------------+----------------------------+--------------------+ | | | | 16+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 16 | gfx9 | gfx11, gfx12 | | f16 / f16 / f16** | 16 | 16 +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | | +------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx9 | \- | | | 32 | 32 +---------------+----------------------------+--------------------+ | | | | 16+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx908 | \- | | | | +---------------+----------------------------+--------------------+ | | 16 | 16 | 16 | gfx90a, gfx942, gfx950 | gfx11, gfx12 | | | | +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | | bf16 / f32 / f32 +------------+-----------+---------------+----------------------------+--------------------+ | | | | 4+ | gfx908 | \- | | | | +---------------+----------------------------+--------------------+ | | 32 | 32 | 8 | gfx90a, gfx942, gfx950 | \- | | | | +---------------+----------------------------+--------------------+ | | | | 16+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx908 | \- | | | | +---------------+----------------------------+--------------------+ | | 16 | 16 | 16 | gfx90a, gfx942, gfx950 | gfx11, gfx12 | | | | +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | | bf16 / bf16 / f32 +------------+-----------+---------------+----------------------------+--------------------+ | | | | 4+ | gfx908 | \- | | | | +---------------+----------------------------+--------------------+ | | 32 | 32 | 8 | gfx90a, gfx942, gfx950 | \- | | | | +---------------+----------------------------+--------------------+ | | | | 16+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | | | | 8 | gfx908 | \- | | | | +---------------+----------------------------+--------------------+ | | 16 | 16 | 16 | gfx90a, gfx942, gfx950 | gfx11, gfx12 | | | | +---------------+----------------------------+--------------------+ | | | | 32+ | gfx950 | \- | | bf16 / bf16 / bf16** +------------+-----------+---------------+----------------------------+--------------------+ | | | | 4+ | gfx908 | \- | | | | +---------------+----------------------------+--------------------+ | | 32 | 32 | 8 | gfx90a, gfx942, gfx950 | \- | | | | +---------------+----------------------------+--------------------+ | | | | 16+ | gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | |16 |16 | 4+ | gfx9 | \- | | f32 / f32 / f32 +------------+-----------+---------------+----------------------------+--------------------+ | |32 |32 | 2+ | gfx9 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | |16 |16 | 8+ | | | | xf32 / xf32 / xf32 +------------+-----------+---------------+ gfx942 | \- | | |32 |32 | 4+ | | | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ | f64 / f64 / f64 |16 |16 | 4+ | gfx90a, gfx942, gfx950 | \- | +------------------------------+------------+-----------+---------------+----------------------------+--------------------+ .. note:: BlockM/N values are minimum recommended values. Below these values padding is used which may impact performance. Above this value powers of 2 are acceptable. \* BlockK range specifies the minimum recommended value. Below this value padding is used which may impact performance. Above this value powers of 2 are acceptable. In practice, BlockK values are typically 32 or less. \*\* On CDNA architectures, matrix unit accumulation is performed in natively 32-bit precision and then converted to the target data type. .. note:: rocWMMA supports partial fragment sizes where ``FragMNK`` may be smaller than the ``BlockMNK`` sizes listed in the table above. These fragments are internally padded to nearest supported ``BlockMNK`` sizes. Supported matrix layouts ------------------------ (N = col major, T = row major) .. tabularcolumns:: |C|C|C|C| +---------+--------+---------+--------+ |LayoutA |LayoutB |Layout C |LayoutD | +=========+========+=========+========+ |N |N |N |N | +---------+--------+---------+--------+ |N |N |T |T | +---------+--------+---------+--------+ |N |T |N |N | +---------+--------+---------+--------+ |N |T |T |T | +---------+--------+---------+--------+ |T |N |N |N | +---------+--------+---------+--------+ |T |N |T |T | +---------+--------+---------+--------+ |T |T |N |N | +---------+--------+---------+--------+ |T |T |T |T | +---------+--------+---------+--------+ Supported thread block sizes ---------------------------- rocWMMA supports up to four wavefronts per thread block. The X dimension should be a multiple of the wave size and is scaled accordingly. .. tabularcolumns:: |C|C| +------------+------------+ |TBlock_X |TBlock_Y | +============+============+ |WaveSize |1 | +------------+------------+ |WaveSize |2 | +------------+------------+ |WaveSize |4 | +------------+------------+ |WaveSize*2 |1 | +------------+------------+ |WaveSize*2 |2 | +------------+------------+ |WaveSize*4 |1 | +------------+------------+ .. note:: WaveSize (RDNA) = 32 WaveSize (CDNA) = 64 Using rocWMMA API ----------------- This section describes how to use the rocWMMA library API. rocWMMA datatypes ----------------- matrix_a ^^^^^^^^ .. doxygenstruct:: rocwmma::matrix_a matrix_b ^^^^^^^^ .. doxygenstruct:: rocwmma::matrix_b accumulator ^^^^^^^^^^^ .. doxygenstruct:: rocwmma::accumulator row_major ^^^^^^^^^ .. doxygenstruct:: rocwmma::row_major col_major ^^^^^^^^^ .. doxygenstruct:: rocwmma::col_major default_schedule ^^^^^^^^^^^^^^^^ .. doxygentypedef:: rocwmma::fragment_scheduler::default_schedule coop_row_major_2d ^^^^^^^^^^^^^^^^^ .. doxygentypedef:: rocwmma::fragment_scheduler::coop_row_major_2d coop_col_major_2d ^^^^^^^^^^^^^^^^^ .. doxygentypedef:: rocwmma::fragment_scheduler::coop_col_major_2d coop_row_slice_2d ^^^^^^^^^^^^^^^^^ .. doxygentypedef:: rocwmma::fragment_scheduler::coop_row_slice_2d coop_col_slice_2d ^^^^^^^^^^^^^^^^^ .. doxygentypedef:: rocwmma::fragment_scheduler::coop_col_slice_2d single ^^^^^^^^^ .. doxygentypedef:: rocwmma::fragment_scheduler::single fragment ^^^^^^^^ .. doxygenclass:: rocwmma::fragment :members: rocWMMA enumeration ------------------- layout_t ^^^^^^^^ .. doxygenenum:: rocwmma::layout_t rocWMMA API functions ---------------------- .. doxygenfunction:: rocwmma::fill_fragment .. doxygenfunction:: rocwmma::load_matrix_sync(FragT &frag, const DataT* data, uint32_t ldm) .. doxygenfunction:: rocwmma::load_matrix_sync(FragT &frag, const DataT* data, uint32_t ldm, layout_t layout) .. doxygenfunction:: rocwmma::store_matrix_sync(DataT* data, FragT const& frag, uint32_t ldm) .. doxygenfunction:: rocwmma::store_matrix_sync(DataT* data, FragT const& frag, uint32_t ldm, layout_t layout) .. doxygenfunction:: rocwmma::mma_sync .. doxygenfunction:: rocwmma::synchronize_workgroup rocWMMA transforms API functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. doxygenfunction:: rocwmma::apply_transpose(FragT &&frag) .. doxygenfunction:: rocwmma::apply_data_layout(FragT &&frag) .. doxygenfunction:: rocwmma::apply_fragment(FragT &&frag) .. doxygenfunction:: rocwmma::to_register_file(FragT &&frag) .. doxygenfunction:: rocwmma::from_register_file(FragT &&frag) Sample programs ---------------- A sample demonstrating the use of rocWMMA functions ``load_matrix_sync``, ``store_matrix_sync``, ``fill_fragment``, and ``mma_sync`` is available `here `_. For more sample programs, refer to the `samples directory `_. Emulation tests --------------- The emulation test is a smaller test suite designed for emulators. It includes a subset of ROCWMMA test cases for faster execution on emulated platforms. It supports ``smoke``, ``regression``, and ``extended`` modes. For example, to run a smoke test: .. code-block:: bash rtest.py --install_dir --emulation smoke ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/conceptual/0000775000175100017510000000000015206065535015547 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/conceptual/migration-guide.rst0000664000175100017510000002417015206065535021371 0ustar00jenkinsjenkins.. meta:: :description: C++ library for accelerating mixed precision matrix multiply-accumulate operations leveraging specialized GPU matrix cores on AMD's latest discrete GPUs :keywords: rocWMMA, ROCm, library, API, tool .. _migration-guide: =============================== Migration guide for rocWMMA 2.0 =============================== This document outlines the key API changes and new features introduced in rocWMMA 2.0, with examples to help you migrate from earlier versions. Starting with version 2.0, rocWMMA introduces significant changes to its API, including: - Removal of the cooperative API - Transforms API no longer requires wave count template parameters - rocWMMA fragments now have a fragment scheduler template argument - rocWMMA fragments now support partial fragment sizes ----------------------- Cooperative API changes ----------------------- Previous releases began deprecating cooperative API functions such as those defined in ``rocwmma/rocwmma_coop.hpp``: .. code-block:: c++ template ROCWMMA_DEVICE void load_matrix_coop_sync(fragment& frag, const DataT* data, uint32_t ldm, uint32_t waveIndex); template ROCWMMA_DEVICE void store_matrix_coop_sync(DataT* data, fragment const& frag, uint32_t ldm, uint32_t waveIndex); These functions previously required ``WaveCount`` as a template parameter and passed ``waveIndex`` as an argument to the API calls. This information was used to distribute data responsibilities across participating waves, aiming to balance and optimize data transactions within a thread block. Cooperation between wavefronts in a thread block requires the use of a separate cooperative API, along with propagation of wave count and wave index values. Example of deprecated cooperative API: .. code-block:: c++ // Global read (macro tile) using GRBuffA = fragment; // Local warp coordinate relative to current thread block (wg). constexpr auto warpDims = make_coord2d(WARPS_X, WARPS_Y); auto localWarpCoord = make_coord2d(threadIdx.x / WARP_SIZE, threadIdx.y); // WorkItems will be split up by minimum IOCount to perform either global read or local write. // These are inputs to cooperative functions. constexpr auto warpCount = get<0>(warpDims) * get<1>(warpDims); // Scheduling warp order is analogous to row major priority. // E.g. Wg = (128, 2) = 2x2 warps // (0, 0) (0, 1) Share Schedule: w0 = (0, 0), w1 = (0, 1), // (1, 0) (1, 1) w2 = (1, 0), w3 = (1, 1), count = 4 const auto warpIndex = get<0>(localWarpCoord) * get<1>(warpDims) + get<1>(localWarpCoord); // Transfer data from global memory to local memory GRBuffA grBuffA; load_matrix_coop_sync(grBuffA, gAddrA, lda, warpIndex); store_matrix_coop_sync(ldsAddr, applyDataLayout(applyTranspose(grBuffA)), ldsld, warpIndex); Calculating the warp count and warp index requires extra boilerplate code. It is important to supply the same warp count and warp index values to matching pairs of load, store, and transform APIs. Providing mismatched values to APIs that depend on matching warp count and index poses a risk of incorrect behavior. Embedding the warp count and index into the fragment object helps mitigate the risk. As a result, fragments in rocWMMA 2.0 are augmented with an additional fragment scheduler template parameter. Fragment schedulers are classes that represent thread block scheduling models. These models provide static values for both the wave count and wave order (wave index). Fragment schedulers are classified as either non-cooperative (the default, where waves act independently) or cooperative (where waves collaborate within a thread block). Their names reflect their ordering scheme. Example: .. code-block:: c++ namespace fragment_scheduler { //! @struct default //! @brief The default fragment scheduler; each wave operates independently. using default_schedule = IOScheduler::Default; //! @struct coop_row_major_2d //! @brief A cooperative scheduling strategy where each wave in the 2d thread block //! will contribute to the fragment operation in row_major grid order. //! All waves are scheduled in row_major order. //! E.g. (TBlockX, TBlockY) => 2x2 waves //! w0 = (0, 0), w1 = (0, 1), //! w2 = (1, 0), w3 = (1, 1) //! @tparam TBlockX the size of the thread-block in the X dimension //! @tparam TBlockY the size of the thread-block in the Y dimension template using coop_row_major_2d = IOScheduler::RowMajor2d; ... } Here is the simplified usage with new cooperative fragment changes: .. code-block:: c++ // Global read (macro tile) // Distribute segments of macro tile data between waves of the thread block in // row major order. using CoopScheduler = fragment_scheduler::coop_row_major_2d; using GRBuffA = fragment; // Transfer data from global memory to local memory GRBuffA grBuffA; load_matrix_sync(grBuffA, gAddrA, lda); store_matrix_sync(ldsAddr, apply_data_layout(apply_transpose(grBuffA)), ldsld); To summarize, the ``CoopScheduler`` template parameter allows you to express the required cooperative behavior with the fragment class declaration. Boilerplate code for calculating wave count and wave indices is wrapped into the ``CoopScheduler`` class. You can use fragments with the standard rocWMMA API without the need to externally propagate matching wave counts or wave indices, making rocWMMA more compact and expressive than previous versions. .. note:: Cooperative fragment schedulers require template parameters for ``TBLOCK_X`` and ``TBLOCK_Y`` dimensions. This design enables various optimizations by allowing the schedulers to provide a static wave count at compile time. As a result, rocWMMA no longer supports run-time wave count calculations in favor of better performance. ------------------------ Partial fragment support ------------------------ In previous rocWMMA versions, fragment sizes were required to be a multiple of the minimum block sizes, as described in the :doc:`programmers-guide`. This was a function of the MMA implementation of hardware acceleration. Thus, rocWMMA serves as a direct hardware enablement to employ block-wise decomposition of matrix-multiply problems. In absence of perfect block-wise decompositions, there is a need to accommodate odd-sized blocks or partials. To increase the utility of rocWMMA to more applications, rocWMMA was extended to include support for partial tile sizes, allowing fragment dimensions (FragMNK) to differ from the minimum block-wise dimensions required for MMA (BlockMNK). rocWMMA now pads FragMNK dimensions to meet the minimal BlockMNK dimensions, ensuring compatibility with MMA hardware. .. code-block:: c++ // Fragment types, assuming ROCWMMA_MNK are minimum block sizes. // These fragments will not use any padding. using FragA = fragment; using FragB = fragment; using Accum = fragment; FragA fragA; FragB fragB; Accum accum; fill_fragment(accum, 0); load_matrix_sync(fragA, gAddrA, lda); load_matrix_sync(fragB, gAddrB, ldb); mma_sync(accum, fragA, fragB, accum); store_matrix_sync(gResC, accum, ldc, layout_t::mem_row_major); // Now also supported // Fragment types, which are partial fragments. // These fragments will use padding to minimum block sizes internally. // Note: The dimensions (2, 3, 1) are smaller than BlockMNK, creating partial fragments using FragA = fragment; using FragB = fragment; using Accum = fragment; FragA fragA; FragB fragB; Accum accum; fill_fragment(accum, 0); load_matrix_sync(fragA, gAddrA, lda); load_matrix_sync(fragB, gAddrB, ldb); mma_sync(accum, fragA, fragB, accum); store_matrix_sync(gResC, accum, ldc, layout_t::mem_row_major); In summary, partial tiles are padded to the minimum MMA block dimensions to accommodate a wider range of fragment sizes. However, this added flexibility comes at a cost: extra registers used for padding might increase kernel register pressure for small tiles and incur extra overhead for checking boundary conditions. Padded fragments are logically restricted to writing in FragMNK dimensions and zeroing boundary conditions. .. note:: Padded fragment internals always use padded-sized resources instead of fragment-sized resources. However, fragment element-wise accesses, such as uniform FMA, should continue to use ``fragment.num_elements``, assuming that any padded elements will be zero. Example: .. code-block:: c++ // Fused multiply-add still valid for partials as padded elements are 0 for(int i = 0; i < frag.num_elements; i++) { frag.x[i] = frag.x[i] * (alpha + 1); } ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/conceptual/programmers-guide.rst0000664000175100017510000004112715206065535021737 0ustar00jenkinsjenkins.. meta:: :description: C++ library for accelerating mixed precision matrix multiply-accumulate operations leveraging specialized GPU matrix cores on AMD's latest discrete GPUs :keywords: rocWMMA, ROCm, library, API, tool .. _programmers-guide: =================== Programming guide =================== This document outlines the library design choices, source code organization and helpful information for new development. -------------------------------- Infrastructure -------------------------------- - Doxygen and Sphinx are used to generate the project's documentation. - Jenkins is used to automate Continuous Integration (CI) testing, with configurations stored in the ``.jenkins`` folder. - rocWMMA is hosted and maintained by AMD on `GitHub `_. .. note:: The rocWMMA repository for ROCm 7.1.1 and earlier is located at ``_. - The rocWMMA project is organized and configured using ``CMake``, with ``CMakeLists.txt`` files in the root of each directory. - ``clang-format`` is used to format C++ code. ``.githooks/install`` ensures that a clang-format pass will run on each committed file. - ``GTest`` is used to implement test suite organization and execution. - ``CTest`` is used to consolidate and invoke multiple test targets. The ``/bin/rocWMMA/CTestTestfile.cmake`` file lists the testing targets executed when ``ctest`` is invoked. - The preferred compiler for rocWMMA is ``CC=/bin/amdclang and CXX=/bin/amdclang++``. ``hipcc`` is also supported, however may be deprecated in future ROCm releases. -------------------------------- hipRTC support -------------------------------- The HIP runtime compilation (hipRTC) environment enables on-the-fly runtime compilation, loading, and execution of device code on AMD GPUs. The rocWMMA library is compatible with hipRTC, so it can be used for runtime-generated kernels. A simple GEMM sample is included to demonstrate compatibility. For more information, refer to the `HIP API Reference `_ -------------------------------- Design concepts -------------------------------- rocWMMA is a header-only library written in C++17 and contained in the ``rocwmma`` namespace. It leverages template meta-programming to optimize code at compile time and generate efficient GPU kernels. rocWMMA offers full implementation visibility, allowing developers to integrate rocWMMA API calls directly into their own kernels. The integrated code is more visible to compiler optimizations for generating efficient device code. The API also avoids expensive host-device transfers or external kernel invokations. The programming model best suited for the rocWMMA API is wavefront-centric. Data loading, storing, and MMA functions are assumed to involve the entire wavefront (or warp). Undefined behaviour is expected if not all threads in each wavefront are active while using rocWMMA. Small block sizes representing edge cases will be automatically padded and will not affect thread masking. The data of collaborative fragments is distributed across participating waves in the same thread block. Collaborative fragments optimize collective data movement between memory locations, such as data movement from global memory to LDS, to balance shared data responsibilities across wavefronts. However, collaborative fragments are not supported in MMA functions. Fragment instances express wavefront collaboration with fragment scheduler meta-data which is specified by the developer. In general, larger fragment sizes are better able to maximize memory bandwidth when transferring data between memory spaces, as well as ordering MMA functions. Beginning with rocWMMA 2.0.0 for ROCm 7.0, the mma_sync API can handle partial or large tiles in a piece-wise manner automatically. This enhanced functionality is demonstrated in the performance samples, which feature simplified workflow with larger tile sizes and good performance. The rocWMMA API reduces the boiler-plate code by providing tools to decompose matrix multiply-accumulate based problems into blocks, or fragments of data that individual wavefronts can efficiently process. Wavefronts abstract blocks of data into ``fragments``, which encapsulate meta-data properties about the blocks in different contexts and the data itself, including: - a general geometric shape, for example, the BlockM/N/K dimensions. - a context of the provenance of the data, for example, ``matrix_a`` or ``matrix_b``. - a context of how the data is stored, for example, row-major or column-major. - a data type, for example, single or half-precision floating point. - a fragment scheduler, for example, a class that specifies thread block collaboration, the number of participating waves, and their execution order. These basic traits determine storage requirements and how rocWMMA organizes individual threads in a layout to fetch or store data. ``Fragments`` are powerful objects because they are versatile in configuring and representing data. Their meta-properties propagate easily via Argument Dependent Lookup (ADL) and other deduction techniques. In practice, users configure fragments for their use case, and rocWMMA handles the underlying details. The implementation code is encapsulated into layered objects, fulfilling specific interface communications from low level functions to high-level API interactions: - **Unit backend operations:** Act as wrappers around device-specific functions, such as intrinsics typically prefixed with ``amdgcn_*``. These functions translate inputs into raw vector forms and addresses required by the low-level intrinsics, and handle architecture or device-specific behavior. - **Vector operations:** This level of objects such as ``OpaqueLoad`` or ``OpaqueStore`` handle variable-sized vector inputs and marshall them into unrolled unit backend operations. They encompass thread-wise details of vector operations. These classes provide a consistent functional interface for input vectors of all sizes, independent of device architecture, whose details are handled at a lower level. - **Fragment operations:** At the API level, user data is stored and managed in ``fragment`` objects. Fragment objects can be visualized as geometric blocks of data from the perspective of a wavefront and stored as vectors. rocWMMA's load, store, and MMA operations operate at the wavefront level, assuming that all threads in the wavefront are participating under the hood. This layer's implementation translates wavefront fragment operations into vector operations. rocWMMA's layered design enables seamless API experience across diverse device architectures and platforms. -------------------------------- Nomenclature -------------------------------- GEMM ^^^^^ Generalized Matrix-Matrix multiplication (GEMM) is a fundamental application for rocWMMA, solving the equation ``D = alpha * A x B + beta * C`` , where ``A``, ``B``, ``C``, and ``D`` are matrices, and ``alpha`` and ``beta`` are scalars. Matrices are sized by ``M x N x K``, where ``A = M x K``, ``B = K x N`` and ``C, D = M x N``. rocWMMA includes a range of test and sample kernels covering various parameters. Test kernels are grouped into executables named using parameter strings that describe their specific implementations. .. code-block:: bash PGR# - Prefetch Global Read lookup stages. PGR0 = no global read prefetch. PGR1 = 1 stage global read prefetch. LB# - LDS buffer count. LB0 = no LDS usage, LB2 = 2 LDS buffers used for swap. MP# - MFMA instruction priority. MP0 = default MFMA instruction priority of 0. MP1 = raise MFMA instruction priority to 1. MB - Multiple output blocks targeted per wave SB - Single output block target per wave NC - Non-Cooperative load / store CP - Cooperative load / store BLK - Cooperative load / store per block tile WV - Cooperative load / store per wave tile WG - Cooperative load / store per macro tile * ``gemm_PGR0_LB0_MP0_SB_NC``: The simplest blocked GEMM example, which targets one output block of matrix multiplication per wave. No prefetch, no LDS usage, default MFMA prioritization, single block output and non-collaborative. * ``gemm_PGR0_LB0_MP0_MB_NC``: Implements a multi-block GEMM where each wave is responsible for a BlocksX x BlocksY grid of output blocks. No prefetch, no LDS usage, default MFMA prioritization, multiple blocks output, and non-collaborative. * ``gemm_PGR1_LB2_MP0_MB_CP_BLK``: Implements a multi-block GEMM where each wave is responsible for a BlocksX x BlocksY grid of output blocks. This kernel leverages shared memory to implement a data prefetching pipeline and collaborates with other waves to improve performance. Implements single stage prefetch, double LDS buffer, default MFMA prioritization, multiple blocks output, and is block-tile collaborative in global read and local write. * ``gemm_PGR1_LB2_MP0_MB_CP_WV``: Implements a multi-block GEMM where each wave is responsible for a BlocksX x BlocksY grid of output blocks. This kernel leverages shared memory to implement a data prefetching pipeline and collaborates with other waves to improve performance. Implements single stage prefetch, double LDS buffer, default MFMA prioritization, multiple blocks output, and is wave-tile collaborative in global read and local write. * ``gemm_PGR1_LB2_MP0_MB_CP_WG``: Implements a multi-block GEMM where each wave is responsible for a BlocksX x BlocksY grid of output blocks. This kernel leverages shared memory to implement a data prefetching pipeline and collaborates with other waves to improve performance. Implements single stage prefetch, double LDS buffer, default MFMA prioritization, multiple blocks output and is macro-tile collaborative in global read and local write. * ``Ad Hoc Test``: An executable targeting a specific set of kernel parameters. This is used as a quick mock-up for investigating a particular GEMM kernel scenario. Validation tests are postfixed with ``-validate``. Benchmark tests are postfixed with ``-bench``. Sample kernels are built with minimal infrastructure as possible and use more approachable names to appeal to a broader audience. * ``simple_sgemm``: a simple GEMM kernel with ``s`` denoting single-precision floating-point data type. * ``simple_dgemm``: a simple GEMM kernel with ``d`` denoting double-precision floating-point data type. * ``simple_hgemm``: a simple GEMM kernel with ``h`` denoting half-precision floating-point data type. * ``perf_sgemm``: a performant GEMM kernel with ``s`` denoting single-precision floating-point data type. * ``perf_dgemm``: a performant GEMM kernel with ``d`` denoting double-precision floating-point data type. * ``perf_hgemm``: a performant GEMM kernel with ``h`` denoting half-precision floating-point data type. GEMV ^^^^^ Generalized Matrix-Vector multiplication (GEMV) is another application for rocWMMA that solves the equation ``y = alpha * A * x + beta * y``, where ``A`` is a matrix, ``x and y`` are vectors and ``alpha and beta`` are scalars. ``Matrix A`` is sized as ``M x K``, vector ``X`` is ``K x 1``, and vector ``Y`` is ``M x 1``. rocWMMA implements the following simple GEMV demonstration samples: * ``simple_sgemv``: Simple GEMV kernel with ``s`` denoting single-precision floating-point data type. * ``simple_dgemv``: Simple GEMV kernel with ``d`` denoting double-precision floating-point data type. DLRM ^^^^ rocWMMA implements a simple component of Deep Learning Recommendation Model (DLRM) for machine learning. Both forward and backward passes using half-precision inputs and outputs are demonstrated. * ``simple_dlrm``: Simple GEMV kernel with ``s`` denoting single-precision floating-point data type. -------------------------------- Library source code organization -------------------------------- The rocWMMA code consists of four major parts: - The ``library`` directory contains the header library API and its implementation. - The ``samples`` directory contains real-world sample use-cases using the rocWMMA API. - The ``test`` directory contains testing infrastructure for rocWMMA. - The ``docs`` directory contains sources for documentation generation. ``library`` directory ^^^^^^^^^^^^^^^^^^^^^^^ The ``library`` directory is structured as follows: - ``library/include/rocwmma/``: C++ include files for the rocWMMA API. These files also contain Doxygen content that documents the API. - ``rocwmma.hpp``: The main API for rocWMMA, defining fragment data abstractions, wave-wise storing, loading, matrix multiply-accumulate (mma) and thread block synchronization. This API offers function signatures highly compatible with common CUDA WMMA interfaces. - ``rocwmma_transforms.hpp``: A complimentary API for rocWMMA, defining functionality to manipulate fragment data, for example, transpose and data layout changes. These are unique to rocWMMA. - ``library/include/internal``: Internal include files which define the main infrastructure driving the rocWMMA API: - Configuration of platforms and architectures. - Type support. - Input and output configuration, shapes and traits. - Loading and storing utilities. - Layouts of memory and registers. - Mapping utilities. - Intrinsic wrappers and hardware abstraction. - Vector class implementations. - Vector conversion, permutation, and transform utilities. - Vector packing and unpacking. - Matrix multiply-accumulate. - Cooperative loading and storing. - Thread block synchronization and flow control. - Utility code. - Data layout transformation utilities. ``samples`` directory ^^^^^^^^^^^^^^^^^^^^^^^ The ``samples`` directory contains the sample codes for the following use cases: - ``samples/hipRTC_gemm.cpp``: Simple General Matrix Multiply (GEMM) algorithm demonstration without LDS memory usage or transposition, running within the hipRTC environment. - ``samples/simple_sgemv.cpp``: Simple matrix multiply-accumulate with a vector demonstration without LDS or transposition for single-precision floating-point types. - ``samples/simple_dgemv.cpp``: Simple matrix multiply-accumulate with a vector demonstration without LDS or transposition for double-precision floating-point types. - ``samples/simple_sgemm.cpp``: Simple GEMM algorithm demonstration without LDS memory usage or transposition for single-precision floating-point types. - ``samples/simple_dgemm.cpp``: Simple GEMM algorithm demonstration without LDS memory usage or transposition for double-precision floating-point types. - ``samples/simple_hgemm.cpp``: Simple GEMM algorithm demonstration without LDS memory usage or transposition for half-precision floating-point types. - ``samples/perf_sgemm.cpp``: High performing multi-block GEMM algorithm demonstration with LDS memory, macro tile collaboration, data reuse and optimized pipeline for single-precision floating-point types. - ``samples/perf_dgemm.cpp``: High performing multi-block GEMM algorithm demonstration with LDS memory, macro tile collaboration, data reuse and optimized pipeline for double-precision floating-point types. - ``samples/perf_hgemm.cpp``: High performant multi-block GEMM algorithm demonstration with LDS memory, macro tile collaboration, data reuse and optimized pipeline for half-precision floating-point types. - ``samples/simple_dlrm.cpp``: Simple Deep Learning Recommendation Model (DLRM) for machine learning. - ``samples/common.hpp``: Common code used by all the above rocWMMA sample files. ``test`` directory ^^^^^^^^^^^^^^^^^^^^^^^ The ``test`` directory contains the following test code support: - ``test/bin``: To generate benchmark plots from the ``gtest`` output dumps of rocWMMA's benchmark tests. - ``test/device``: Device utility kernels to support test setup and validation on GPU. - ``test/dlrm``: For various strategies of DLRM application. This test is used to validate DLRM functions using rocWMMA API. - ``test/gemm``: For various strategies of GEMM application. This test is used to validate and benchmark GEMM functions using rocWMMA API. - ``test/unit``: For testing the basic functional units of rocWMMA library. ``docs`` directory ^^^^^^^^^^^^^^^^^^^ - Sphinx and Doxygen are used to generate the project's documentation. - ``api-reference-guide.rst`` pulls from Doxygen documentation to format the API documentation. - ``installation.rst`` builds installation and build instructions for rocWMMA. - ``license.rst`` includes information about rocWMMA licensing. - ``programmers-guide.rst`` includes information about project organization and expectations. - ``what-is-rocwmma.rst`` includes a description of rocWMMA. Contributing ^^^^^^^^^^^^ To contribute to the project, see `Contributing to rocWMMA `_. ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/conf.py0000664000175100017510000000214615206065535014714 0ustar00jenkinsjenkins# Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html import re from rocm_docs import ROCmDocs with open('../CMakeLists.txt', encoding='utf-8') as f: match = re.search(r'set \( VERSION_STRING\s+\"?([0-9.]+)[^0-9.]+', f.read()) if not match: raise ValueError("VERSION not found!") version_number = match[1] left_nav_title = f"rocWMMA {version_number} Documentation" # for PDF output on Read the Docs project = "rocWMMA Documentation" author = "Advanced Micro Devices, Inc." copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved." version = version_number release = version_number external_toc_path = "./sphinx/_toc.yml" docs_core = ROCmDocs(left_nav_title) docs_core.run_doxygen(doxygen_root="doxygen", doxygen_path="doxygen/xml") docs_core.setup() external_projects_current_project = "rocwmma" for sphinx_var in ROCmDocs.SPHINX_VARS: globals()[sphinx_var] = getattr(docs_core, sphinx_var) ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/doxygen/0000775000175100017510000000000015206065535015067 5ustar00jenkinsjenkins././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1779985245.2778184 rocwmma/docs/doxygen/Doxyfile0000664000175100017510000033120115206065535016575 0ustar00jenkinsjenkins# Doxyfile 1.8.17 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "rocwmma" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = v0.7 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "Prototype facilitating GEMM or GEMM-like matrix multiplications compatible with ROCm platform and HiP." # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = . # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all generated output in the proper direction. # Possible values are: None, LTR, RTL and Context. # The default value is: None. OUTPUT_TEXT_DIRECTION = None # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line # such as # /*************** # as being the beginning of a Javadoc-style comment "banner". If set to NO, the # Javadoc-style will behave just like regular comments and it will not be # interpreted by doxygen. # The default value is: NO. JAVADOC_BANNER = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines (in the resulting output). You can put ^^ in the value part of an # alias to insert a newline as if a physical newline was in the original file. # When you need a literal { or } or , in the value part of an alias you have to # escape them by means of a backslash (\), this can lead to conflicts with the # commands \{ and \} for these it is advised to use the version @{ and @} or use # a double escape (\\{ and \\}) ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, # Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, # Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files), VHDL, tcl. For instance to make doxygen treat # .inc files as Fortran files (default is PHP), and .f files as C (default is # Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 5 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = YES # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = YES # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual # methods of a class will be included in the documentation. # The default value is: NO. EXTRACT_PRIV_VIRTUAL = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # (including Cygwin) ands Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = NO # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. If # EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = YES # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = ../../library/include/rocwmma \ ../../library/include/rocwmma/internal # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: https://www.gnu.org/software/libiconv/) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), # *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen # C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd, # *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.idl \ *.ddl \ *.odl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.cs \ *.d \ *.php \ *.php4 \ *.php5 \ *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ *.py \ *.tcl \ *.vhd \ *.vhdl \ *.ucf \ *.qsf \ *.as \ *.js # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have JavaScript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: https://developer.apple.com/xcode/), introduced with OSX # 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 1 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. FORMULA_MACROFILE = # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. # The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /